From e26c10bcb81e9ee502c8d8e8838572d539223e72 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 2 Jan 2024 17:42:28 -0500 Subject: [PATCH 001/140] Fixing CI pipeline. --- ...njectionWithAvailabilityStrategyTests.java | 1598 ++++++++--------- 1 file changed, 799 insertions(+), 799 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index e4081ecc4a27..b9cd56b3c018 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -2547,500 +2547,500 @@ public Object[][] testConfigs_queryAfterCreation() { // }, // Plain vanilla single partition query. No failure injection and all records will fit into a single page - new Object[] { - "DefaultPageSize_SinglePartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - noFailureInjection, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - null, - validateExactlyOneRecordReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple cross partition query. No failure injection and all records returned for a partition will fit - // into a single page. But there will be one page per partition - new Object[] { - "DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - noFailureInjection, - validateStatusCodeIs200Ok, - PHYSICAL_PARTITION_COUNT, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - validateAllRecordsSameIdReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple single partition query. No failure injection but page size set to 1 - so, multiple pages will - // be returned from the PagedFlux - for each document one page - and the expectation is that there - // will be as many CosmosDiagnosticsContext instances as pages. - new Object[] { - "PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOne, - noFailureInjection, - validateStatusCodeIs200Ok, - 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - validateAllRecordsSamePartitionReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE - }, - - // Simple cross partition query. No failure injection but page size set to 1 - so, multiple pages will - // be returned from the PagedFlux per physical partition - for each document one page - and the - // expectation is that there will be as many CosmosDiagnosticsContext instances as pages. - new Object[] { - "PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOne, - noFailureInjection, - validateStatusCodeIs200Ok, - 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - validateAllRecordsSameIdReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple single partition query intended to not return any results. No failure injection and only - // one empty page expected - with exactly one CosmosDiagnostics instance - new Object[] { - "EmptyResults_SinglePartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - singlePartitionEmptyResultQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOne, - noFailureInjection, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan - ), - null, - validateEmptyResults, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple cross partition query intended to not return any results. No failures injected. - // Empty pages should be skipped (except for the last one) - so, exactly one empty page expected - - // with exactly one CosmosDiagnostics instance - even when this is a cross-partition query touching all - // partitions - new Object[] { - "EmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionEmptyResultQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOne, - noFailureInjection, - validateStatusCodeIs200Ok, - // empty pages are skipped except for the last one - 1, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[1].getClientSideRequestStatistics().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - } - ), - null, - validateEmptyResults, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple cross partition query intended to not return any results. No failures injected. - // Empty pages should be returned - so, exactly one page per partition expected - - // with exactly one CosmosDiagnostics instance (plus query plan on very first one) - new Object[] { - "EmptyResults_EnableEmptyPageRetrieval_CrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionEmptyResultQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOneAndEmptyPagesEnabled, - noFailureInjection, - validateStatusCodeIs200Ok, - // empty pages are bubbled up - PHYSICAL_PARTITION_COUNT, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[1].getClientSideRequestStatistics().size()) - .isEqualTo(1); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(1); - } - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[0].getClientSideRequestStatistics().size()) - .isEqualTo(1); - assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(1); - } - ), - validateEmptyResults, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple cross partition query intended to not return any results except on one partition. - // No failures injected. Empty pages of all but one partition will be skipped, but - // query metrics and client side request statistics are captured in the merged diagnostics. - new Object[] { - "AllButOnePartitionEmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - noFailureInjection, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[1].getClientSideRequestStatistics().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - } - ), - null, - validateExactlyOneRecordReturned, - NO_OTHER_DOCS_WITH_SAME_ID, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Single partition query with DISTINCT and ORDER BY. No failures injected - // Expect to get as many pages and diagnostics contexts as there are documents for this PK-value - new Object[] { - "AggregatesAndOrderBy_PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - singlePartitionWithAggregatesAndOrderByQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOne, - noFailureInjection, - validateStatusCodeIs200Ok, - 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - validateAllRecordsSamePartitionReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE - }, - - // Single partition query with DISTINCT and ORDER BY. No failures injected - // Only a single document matches the where condition - but this is a cross partition query. Because - // the single page returned in the CosmosPagedFlux had to peek into all physical partitions to be - // able to achieve global ordering in the query pipeline a single CosmosDiagnosticsContext instance - // is returned - but with query metrics and client request statistics for all partitions - new Object[] { - "AggregatesAndOrderBy_PageSizeOne_CrossPartitionSingleRecord_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionWithAggregatesAndOrderByQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOne, - noFailureInjection, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[1].getClientSideRequestStatistics().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - } - ), - null, - validateExactlyOneRecordReturned, - NO_OTHER_DOCS_WITH_SAME_PK, - NO_OTHER_DOCS_WITH_SAME_ID - }, - - // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where - // condition but the distinct id value is identical - so, to the application only a single record is - // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances - // as there are documents with the same id-value. - new Object[] { - "AggregatesAndOrderBy_PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionWithAggregatesAndOrderByQueryGenerator, - queryReturnsTotalRecordCountWithPageSizeOne, - noFailureInjection, - validateStatusCodeIs200Ok, - 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxOnlyFeedResponsesExceptQueryPlan - ), - validateExactlyOneRecordReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where - // condition but the distinct id value is identical - so, to the application only a single record is - // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances - // as there are documents with the same id-value. - new Object[] { - "AggregatesAndOrderBy_DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionWithAggregatesAndOrderByQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - noFailureInjection, - validateStatusCodeIs200Ok, - PHYSICAL_PARTITION_COUNT, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[1].getClientSideRequestStatistics().size()) - .isEqualTo(1); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(1); - } - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[0].getClientSideRequestStatistics().size()) - .isEqualTo(1); - assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(1); - } - ), - validateExactlyOneRecordReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Cross partition query with DISTINCT and ORDER BY. Single document meets the where - // condition, but queries against all partitions need to be executed. Expect to see a single - // page and CosmosDiagnosticsContext - but including three request statistics and query metrics. - new Object[] { - "AggregatesAndOrderBy_DefaultPageSize_SingleRecordCrossPartition_AllGood_NoAvailabilityStrategy", - ONE_SECOND_DURATION, - noAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - crossPartitionWithAggregatesAndOrderByQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - noFailureInjection, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxSingleRegion, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[1].getClientSideRequestStatistics().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) - .isEqualTo(PHYSICAL_PARTITION_COUNT); - } - ), - null, - validateExactlyOneRecordReturned, - NO_OTHER_DOCS_WITH_SAME_ID, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple single partition query - 404/1002 injected into all partition of the first region - // RegionSwitchHint is local - with eager availability strategy - so, the expectation is that the - // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext - // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for - // the attempt in the first region and third one for hedging returning successful response. - new Object[] { - "DefaultPageSize_SinglePartition_404-1002_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", - Duration.ofSeconds(10), - eagerThresholdAvailabilityStrategy, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectReadSessionNotAvailableIntoFirstRegionOnly, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxTwoRegions, - validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isEqualTo(3); - - // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region - // (possibly also fail-over to secondary region) - assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); - assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) - .isEqualTo(true); - - // Ensure second FeedResponse CosmoDiagnostics has only requests to second region - assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) - .isEqualTo(true); - } - ), - null, - validateExactlyOneRecordReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple cross partition query - 404/1002 injected into all partition of the first region - // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the - // retry on the first region will provide a successful response and no hedging is happening. - // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have - // a single CosmosDiagnostics instance contacting both regions. - new Object[] { - "DefaultPageSize_CrossPartition_404-1002_OnlyFirstRegion_AllPartitions_RemotePreferred_ReluctantAvailabilityStrategy", - THREE_SECOND_DURATION, - reluctantThresholdAvailabilityStrategy, - CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED, - ConnectionMode.DIRECT, - crossPartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectReadSessionNotAvailableIntoFirstRegionOnly, - validateStatusCodeIs200Ok, - PHYSICAL_PARTITION_COUNT, - ArrayUtils.toArray( - validateCtxTwoRegions, - validateCtxQueryPlan, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isEqualTo(2); - - // Ensure fail-over happened - assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); - assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) - .isEqualTo(true); - assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) - .isEqualTo(true); - } - ), - ArrayUtils.toArray( - validateCtxTwoRegions, - validateCtxOnlyFeedResponsesExceptQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isEqualTo(1); - - // Ensure fail-over happened - assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(2); - assertThat(diagnostics[0].getContactedRegionNames().contains(FIRST_REGION_NAME)) - .isEqualTo(true); - assertThat(diagnostics[0].getContactedRegionNames().contains(SECOND_REGION_NAME)) - .isEqualTo(true); - } - ), - validateAllRecordsSameIdReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, +// new Object[] { +// "DefaultPageSize_SinglePartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// null, +// validateExactlyOneRecordReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple cross partition query. No failure injection and all records returned for a partition will fit +// // into a single page. But there will be one page per partition +// new Object[] { +// "DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// PHYSICAL_PARTITION_COUNT, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// validateAllRecordsSameIdReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple single partition query. No failure injection but page size set to 1 - so, multiple pages will +// // be returned from the PagedFlux - for each document one page - and the expectation is that there +// // will be as many CosmosDiagnosticsContext instances as pages. +// new Object[] { +// "PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOne, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// validateAllRecordsSamePartitionReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE +// }, +// +// // Simple cross partition query. No failure injection but page size set to 1 - so, multiple pages will +// // be returned from the PagedFlux per physical partition - for each document one page - and the +// // expectation is that there will be as many CosmosDiagnosticsContext instances as pages. +// new Object[] { +// "PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOne, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// validateAllRecordsSameIdReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple single partition query intended to not return any results. No failure injection and only +// // one empty page expected - with exactly one CosmosDiagnostics instance +// new Object[] { +// "EmptyResults_SinglePartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// singlePartitionEmptyResultQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOne, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan +// ), +// null, +// validateEmptyResults, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple cross partition query intended to not return any results. No failures injected. +// // Empty pages should be skipped (except for the last one) - so, exactly one empty page expected - +// // with exactly one CosmosDiagnostics instance - even when this is a cross-partition query touching all +// // partitions +// new Object[] { +// "EmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionEmptyResultQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOne, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// // empty pages are skipped except for the last one +// 1, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// } +// ), +// null, +// validateEmptyResults, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple cross partition query intended to not return any results. No failures injected. +// // Empty pages should be returned - so, exactly one page per partition expected - +// // with exactly one CosmosDiagnostics instance (plus query plan on very first one) +// new Object[] { +// "EmptyResults_EnableEmptyPageRetrieval_CrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionEmptyResultQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOneAndEmptyPagesEnabled, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// // empty pages are bubbled up +// PHYSICAL_PARTITION_COUNT, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) +// .isEqualTo(1); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(1); +// } +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[0].getClientSideRequestStatistics().size()) +// .isEqualTo(1); +// assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(1); +// } +// ), +// validateEmptyResults, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple cross partition query intended to not return any results except on one partition. +// // No failures injected. Empty pages of all but one partition will be skipped, but +// // query metrics and client side request statistics are captured in the merged diagnostics. +// new Object[] { +// "AllButOnePartitionEmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// } +// ), +// null, +// validateExactlyOneRecordReturned, +// NO_OTHER_DOCS_WITH_SAME_ID, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Single partition query with DISTINCT and ORDER BY. No failures injected +// // Expect to get as many pages and diagnostics contexts as there are documents for this PK-value +// new Object[] { +// "AggregatesAndOrderBy_PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// singlePartitionWithAggregatesAndOrderByQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOne, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// validateAllRecordsSamePartitionReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE +// }, +// +// // Single partition query with DISTINCT and ORDER BY. No failures injected +// // Only a single document matches the where condition - but this is a cross partition query. Because +// // the single page returned in the CosmosPagedFlux had to peek into all physical partitions to be +// // able to achieve global ordering in the query pipeline a single CosmosDiagnosticsContext instance +// // is returned - but with query metrics and client request statistics for all partitions +// new Object[] { +// "AggregatesAndOrderBy_PageSizeOne_CrossPartitionSingleRecord_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionWithAggregatesAndOrderByQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOne, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// } +// ), +// null, +// validateExactlyOneRecordReturned, +// NO_OTHER_DOCS_WITH_SAME_PK, +// NO_OTHER_DOCS_WITH_SAME_ID +// }, +// +// // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where +// // condition but the distinct id value is identical - so, to the application only a single record is +// // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances +// // as there are documents with the same id-value. +// new Object[] { +// "AggregatesAndOrderBy_PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionWithAggregatesAndOrderByQueryGenerator, +// queryReturnsTotalRecordCountWithPageSizeOne, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxOnlyFeedResponsesExceptQueryPlan +// ), +// validateExactlyOneRecordReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where +// // condition but the distinct id value is identical - so, to the application only a single record is +// // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances +// // as there are documents with the same id-value. +// new Object[] { +// "AggregatesAndOrderBy_DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionWithAggregatesAndOrderByQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// PHYSICAL_PARTITION_COUNT, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) +// .isEqualTo(1); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(1); +// } +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[0].getClientSideRequestStatistics().size()) +// .isEqualTo(1); +// assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(1); +// } +// ), +// validateExactlyOneRecordReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Cross partition query with DISTINCT and ORDER BY. Single document meets the where +// // condition, but queries against all partitions need to be executed. Expect to see a single +// // page and CosmosDiagnosticsContext - but including three request statistics and query metrics. +// new Object[] { +// "AggregatesAndOrderBy_DefaultPageSize_SingleRecordCrossPartition_AllGood_NoAvailabilityStrategy", +// ONE_SECOND_DURATION, +// noAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// crossPartitionWithAggregatesAndOrderByQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// noFailureInjection, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) +// .isEqualTo(PHYSICAL_PARTITION_COUNT); +// } +// ), +// null, +// validateExactlyOneRecordReturned, +// NO_OTHER_DOCS_WITH_SAME_ID, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple single partition query - 404/1002 injected into all partition of the first region +// // RegionSwitchHint is local - with eager availability strategy - so, the expectation is that the +// // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext +// // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for +// // the attempt in the first region and third one for hedging returning successful response. +// new Object[] { +// "DefaultPageSize_SinglePartition_404-1002_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", +// Duration.ofSeconds(10), +// eagerThresholdAvailabilityStrategy, +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectReadSessionNotAvailableIntoFirstRegionOnly, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxTwoRegions, +// validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isEqualTo(3); +// +// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region +// // (possibly also fail-over to secondary region) +// assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); +// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) +// .isEqualTo(true); +// +// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region +// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) +// .isEqualTo(true); +// } +// ), +// null, +// validateExactlyOneRecordReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple cross partition query - 404/1002 injected into all partition of the first region +// // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the +// // retry on the first region will provide a successful response and no hedging is happening. +// // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have +// // a single CosmosDiagnostics instance contacting both regions. +// new Object[] { +// "DefaultPageSize_CrossPartition_404-1002_OnlyFirstRegion_AllPartitions_RemotePreferred_ReluctantAvailabilityStrategy", +// THREE_SECOND_DURATION, +// reluctantThresholdAvailabilityStrategy, +// CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// crossPartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectReadSessionNotAvailableIntoFirstRegionOnly, +// validateStatusCodeIs200Ok, +// PHYSICAL_PARTITION_COUNT, +// ArrayUtils.toArray( +// validateCtxTwoRegions, +// validateCtxQueryPlan, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isEqualTo(2); +// +// // Ensure fail-over happened +// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); +// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) +// .isEqualTo(true); +// assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) +// .isEqualTo(true); +// } +// ), +// ArrayUtils.toArray( +// validateCtxTwoRegions, +// validateCtxOnlyFeedResponsesExceptQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isEqualTo(1); +// +// // Ensure fail-over happened +// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(2); +// assertThat(diagnostics[0].getContactedRegionNames().contains(FIRST_REGION_NAME)) +// .isEqualTo(true); +// assertThat(diagnostics[0].getContactedRegionNames().contains(SECOND_REGION_NAME)) +// .isEqualTo(true); +// } +// ), +// validateAllRecordsSameIdReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, // Simple cross partition query - 404/1002 injected into only a single partition of the first region // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the @@ -3049,7 +3049,7 @@ public Object[][] testConfigs_queryAfterCreation() { // a single CosmosDiagnostics instance contacting both regions. new Object[] { "DefaultPageSize_CrossPartition_404-1002_OnlyFirstRegion_SinglePartition_RemotePreferred_ReluctantAvailabilityStrategy", - ONE_SECOND_DURATION, + Duration.ofSeconds(50), reluctantThresholdAvailabilityStrategy, CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED, ConnectionMode.DIRECT, @@ -3097,313 +3097,313 @@ public Object[][] testConfigs_queryAfterCreation() { // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for // the attempt in the first region and third one for hedging returning successful response. - new Object[] { - "DefaultPageSize_SinglePartition_503_AllRegions_EagerAvailabilityStrategy", - Duration.ofSeconds(10), - eagerThresholdAvailabilityStrategy, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectServiceUnavailableIntoAllRegions, - validateStatusCodeIsServiceUnavailable, - 1, - ArrayUtils.toArray( - validateCtxTwoRegions, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isEqualTo(3); - - // Ensure first FeedResponse reaches both regions since Clinet Retry - // policy should kick in and retry in remote region - assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); - assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) - .isEqualTo(true); - assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) - .isEqualTo(true); - - // Ensure second FeedResponse CosmoDiagnostics has only requests to second region - assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) - .isEqualTo(true); - } - ), - null, - null, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple single partition query. Gateway timeout for query plan retrieval in first region injected. - // This test case validates that the availability strategy and hedging is also applied for the - // query plan request. The expectation is that the query plan request in the first region won't finish, - // the query plan will then be retrieved from the second region but the actual query is executed against the - // first region. - new Object[] { - "DefaultPageSize_SinglePartition_QueryPLanHighLatency_EagerAvailabilityStrategy", - THREE_SECOND_DURATION, - reluctantThresholdAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectQueryPlanTransitTimeoutIntoFirstRegionOnly, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxTwoRegions, - validateCtxQueryPlan, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isGreaterThanOrEqualTo(3); - - // Ensure that the query plan has been retrieved from the second region - assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); - assertThat(diagnostics[0].getClientSideRequestStatistics()).isNotNull(); - assertThat(diagnostics[0].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); - ClientSideRequestStatistics requestStats = diagnostics[0].getClientSideRequestStatistics().iterator().next(); - assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); - assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); - assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); - assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(408); - - // Ensure that the query plan has been retrieved from the second region - assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); - assertThat(diagnostics[1].getClientSideRequestStatistics()).isNotNull(); - assertThat(diagnostics[1].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); - requestStats = diagnostics[1].getClientSideRequestStatistics().iterator().next(); - assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); - assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); - assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); - assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(200); - - - // There possibly is an incomplete diagnostics for the failed query plan retrieval in the first region - // Last Diagnostics should be for processed request against the first region with the - // query plan retrieved from the second region - boolean found = false; - for (int i = 2; i < diagnostics.length; i++) { - if (diagnostics[i].getFeedResponseDiagnostics() != null && - diagnostics[i].getFeedResponseDiagnostics().getQueryMetricsMap() != null) { - - found = true; - assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); - assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); - assertThat(diagnostics[i].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[i].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); - } - } - - assertThat(found).isEqualTo(true); - } - ), - null, - validateExactlyOneRecordReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - // Simple single partition query - 429/3200 injected into all partition of the first region - // Eager availability strategy - so, the expectation is that the - // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext - // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for - // the attempt in the first region and third one for hedging returning successful response. - new Object[] { - "DefaultPageSize_SinglePartition_429-3200_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", - TWO_SECOND_DURATION, - eagerThresholdAvailabilityStrategy, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectRequestRateTooLargeIntoFirstRegionOnly, - validateStatusCodeIs200Ok, - 1, - ArrayUtils.toArray( - validateCtxTwoRegions, - validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isEqualTo(3); - - // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region - // (possibly also fail-over to secondary region) - assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); - assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) - .isEqualTo(true); - - // Ensure second FeedResponse CosmoDiagnostics has only requests to second region - assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) - .isEqualTo(true); - } - ), - null, - validateExactlyOneRecordReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - // Simple single partition query - 429/3200 injected into all regions - // Eager availability strategy - the expectation is that even with hedging, the request will time out - new Object[] { - "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_EagerAvailabilityStrategy", - TWO_SECOND_DURATION, - eagerThresholdAvailabilityStrategy, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectRequestRateTooLargeIntoAllRegions, - validateStatusCodeIsOperationCancelled, - 1, - ArrayUtils.toArray( - validateCtxTwoRegions, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isEqualTo(3); - - // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region - // (possibly also fail-over to secondary region) - assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); - assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) - .isEqualTo(true); - - // Ensure second FeedResponse CosmoDiagnostics has only requests to second region - assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) - .isEqualTo(true); - } - ), - null, - null, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - - // Simple single partition query - 429/3200 injected into first region only - // no availability strategy - the expectation is that no hedging will happen, the request will time out - new Object[] { - "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_noAvailabilityStrategy", - TWO_SECOND_DURATION, - noAvailabilityStrategy, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - ConnectionMode.DIRECT, - singlePartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectRequestRateTooLargeIntoAllRegions, - validateStatusCodeIsOperationCancelled, - 1, - ArrayUtils.toArray( - validateCtxSingleRegion, - (ctx) -> { - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics.length).isEqualTo(2); - - // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region - // (possibly also fail-over to secondary region) - assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) - .isEqualTo(true); - assertThat(diagnostics[1].clientSideRequestStatistics().getResponseStatisticsList().size()).isGreaterThan(1); - } - ), - null, - null, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - }, - // GATEWAY MODE - // ------------ - - // Simple cross partition query - 404/1002 injected into all partition of the first region - // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the - // retry on the first region will provide a successful response and no hedging is happening. - // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have - // a single CosmosDiagnostics instance contacting both regions. - new Object[] { - "GW_DefaultPageSize_CrossPartition_GW408_EagerAvailabilityStrategy", - THREE_SECOND_DURATION, - eagerThresholdAvailabilityStrategy, - noRegionSwitchHint, - ConnectionMode.GATEWAY, - crossPartitionQueryGenerator, - queryReturnsTotalRecordCountWithDefaultPageSize, - injectGatewayTransitTimeoutIntoFirstRegionOnly, - validateStatusCodeIs200Ok, - PHYSICAL_PARTITION_COUNT, - ArrayUtils.toArray( - validateCtxTwoRegions, // query plan 1st region, all queries 2nd region - validateCtxQueryPlan, - (ctx) -> { - assertThat(ctx.getDiagnostics()).isNotNull(); - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - - // Diagnostics of query attempt in first region not even available yet - assertThat(diagnostics.length).isEqualTo(2); - - // query plan on first region - assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); - }, - (ctx) -> { - assertThat(ctx.getDiagnostics()).isNotNull(); - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); - assertThat(diagnostics[1].getFeedResponseDiagnostics()).isNotNull(); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); - assertThat(diagnostics[1].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); - ClientSideRequestStatistics[] clientStats = - diagnostics[1] - .getFeedResponseDiagnostics() - .getClientSideRequestStatistics() - .toArray(new ClientSideRequestStatistics[0]); - assertThat(clientStats.length).isEqualTo(1); - for (int i = 0; i < clientStats.length; i++) { - assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); - assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); - assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); - assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); - assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); - assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); - } - } - ), - ArrayUtils.toArray( - validateCtxSingleRegion, - (ctx) -> { - assertThat(ctx.getDiagnostics()).isNotNull(); - CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); - assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); - assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); - assertThat(diagnostics[0].getFeedResponseDiagnostics()).isNotNull(); - assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); - assertThat(diagnostics[0].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); - ClientSideRequestStatistics[] clientStats = - diagnostics[0] - .getFeedResponseDiagnostics() - .getClientSideRequestStatistics() - .toArray(new ClientSideRequestStatistics[0]); - assertThat(clientStats.length).isEqualTo(1); - for (int i = 0; i < clientStats.length; i++) { - assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); - assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); - assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); - assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); - assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); - assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); - } - } - ), - validateAllRecordsSameIdReturned, - ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, - NO_OTHER_DOCS_WITH_SAME_PK - } +// new Object[] { +// "DefaultPageSize_SinglePartition_503_AllRegions_EagerAvailabilityStrategy", +// Duration.ofSeconds(10), +// eagerThresholdAvailabilityStrategy, +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectServiceUnavailableIntoAllRegions, +// validateStatusCodeIsServiceUnavailable, +// 1, +// ArrayUtils.toArray( +// validateCtxTwoRegions, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isEqualTo(3); +// +// // Ensure first FeedResponse reaches both regions since Clinet Retry +// // policy should kick in and retry in remote region +// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); +// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) +// .isEqualTo(true); +// assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) +// .isEqualTo(true); +// +// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region +// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) +// .isEqualTo(true); +// } +// ), +// null, +// null, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple single partition query. Gateway timeout for query plan retrieval in first region injected. +// // This test case validates that the availability strategy and hedging is also applied for the +// // query plan request. The expectation is that the query plan request in the first region won't finish, +// // the query plan will then be retrieved from the second region but the actual query is executed against the +// // first region. +// new Object[] { +// "DefaultPageSize_SinglePartition_QueryPLanHighLatency_EagerAvailabilityStrategy", +// THREE_SECOND_DURATION, +// reluctantThresholdAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectQueryPlanTransitTimeoutIntoFirstRegionOnly, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxTwoRegions, +// validateCtxQueryPlan, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isGreaterThanOrEqualTo(3); +// +// // Ensure that the query plan has been retrieved from the second region +// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); +// assertThat(diagnostics[0].getClientSideRequestStatistics()).isNotNull(); +// assertThat(diagnostics[0].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); +// ClientSideRequestStatistics requestStats = diagnostics[0].getClientSideRequestStatistics().iterator().next(); +// assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); +// assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); +// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); +// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(408); +// +// // Ensure that the query plan has been retrieved from the second region +// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); +// assertThat(diagnostics[1].getClientSideRequestStatistics()).isNotNull(); +// assertThat(diagnostics[1].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); +// requestStats = diagnostics[1].getClientSideRequestStatistics().iterator().next(); +// assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); +// assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); +// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); +// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(200); +// +// +// // There possibly is an incomplete diagnostics for the failed query plan retrieval in the first region +// // Last Diagnostics should be for processed request against the first region with the +// // query plan retrieved from the second region +// boolean found = false; +// for (int i = 2; i < diagnostics.length; i++) { +// if (diagnostics[i].getFeedResponseDiagnostics() != null && +// diagnostics[i].getFeedResponseDiagnostics().getQueryMetricsMap() != null) { +// +// found = true; +// assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); +// assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); +// assertThat(diagnostics[i].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[i].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); +// } +// } +// +// assertThat(found).isEqualTo(true); +// } +// ), +// null, +// validateExactlyOneRecordReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// // Simple single partition query - 429/3200 injected into all partition of the first region +// // Eager availability strategy - so, the expectation is that the +// // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext +// // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for +// // the attempt in the first region and third one for hedging returning successful response. +// new Object[] { +// "DefaultPageSize_SinglePartition_429-3200_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", +// TWO_SECOND_DURATION, +// eagerThresholdAvailabilityStrategy, +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectRequestRateTooLargeIntoFirstRegionOnly, +// validateStatusCodeIs200Ok, +// 1, +// ArrayUtils.toArray( +// validateCtxTwoRegions, +// validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isEqualTo(3); +// +// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region +// // (possibly also fail-over to secondary region) +// assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); +// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) +// .isEqualTo(true); +// +// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region +// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) +// .isEqualTo(true); +// } +// ), +// null, +// validateExactlyOneRecordReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// // Simple single partition query - 429/3200 injected into all regions +// // Eager availability strategy - the expectation is that even with hedging, the request will time out +// new Object[] { +// "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_EagerAvailabilityStrategy", +// TWO_SECOND_DURATION, +// eagerThresholdAvailabilityStrategy, +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectRequestRateTooLargeIntoAllRegions, +// validateStatusCodeIsOperationCancelled, +// 1, +// ArrayUtils.toArray( +// validateCtxTwoRegions, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isEqualTo(3); +// +// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region +// // (possibly also fail-over to secondary region) +// assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); +// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) +// .isEqualTo(true); +// +// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region +// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) +// .isEqualTo(true); +// } +// ), +// null, +// null, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// +// // Simple single partition query - 429/3200 injected into first region only +// // no availability strategy - the expectation is that no hedging will happen, the request will time out +// new Object[] { +// "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_noAvailabilityStrategy", +// TWO_SECOND_DURATION, +// noAvailabilityStrategy, +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// ConnectionMode.DIRECT, +// singlePartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectRequestRateTooLargeIntoAllRegions, +// validateStatusCodeIsOperationCancelled, +// 1, +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// (ctx) -> { +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics.length).isEqualTo(2); +// +// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region +// // (possibly also fail-over to secondary region) +// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) +// .isEqualTo(true); +// assertThat(diagnostics[1].clientSideRequestStatistics().getResponseStatisticsList().size()).isGreaterThan(1); +// } +// ), +// null, +// null, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// }, +// // GATEWAY MODE +// // ------------ +// +// // Simple cross partition query - 404/1002 injected into all partition of the first region +// // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the +// // retry on the first region will provide a successful response and no hedging is happening. +// // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have +// // a single CosmosDiagnostics instance contacting both regions. +// new Object[] { +// "GW_DefaultPageSize_CrossPartition_GW408_EagerAvailabilityStrategy", +// THREE_SECOND_DURATION, +// eagerThresholdAvailabilityStrategy, +// noRegionSwitchHint, +// ConnectionMode.GATEWAY, +// crossPartitionQueryGenerator, +// queryReturnsTotalRecordCountWithDefaultPageSize, +// injectGatewayTransitTimeoutIntoFirstRegionOnly, +// validateStatusCodeIs200Ok, +// PHYSICAL_PARTITION_COUNT, +// ArrayUtils.toArray( +// validateCtxTwoRegions, // query plan 1st region, all queries 2nd region +// validateCtxQueryPlan, +// (ctx) -> { +// assertThat(ctx.getDiagnostics()).isNotNull(); +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// +// // Diagnostics of query attempt in first region not even available yet +// assertThat(diagnostics.length).isEqualTo(2); +// +// // query plan on first region +// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); +// }, +// (ctx) -> { +// assertThat(ctx.getDiagnostics()).isNotNull(); +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); +// assertThat(diagnostics[1].getFeedResponseDiagnostics()).isNotNull(); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); +// assertThat(diagnostics[1].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); +// ClientSideRequestStatistics[] clientStats = +// diagnostics[1] +// .getFeedResponseDiagnostics() +// .getClientSideRequestStatistics() +// .toArray(new ClientSideRequestStatistics[0]); +// assertThat(clientStats.length).isEqualTo(1); +// for (int i = 0; i < clientStats.length; i++) { +// assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); +// assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); +// assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); +// assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); +// assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); +// } +// } +// ), +// ArrayUtils.toArray( +// validateCtxSingleRegion, +// (ctx) -> { +// assertThat(ctx.getDiagnostics()).isNotNull(); +// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); +// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); +// assertThat(diagnostics[0].getFeedResponseDiagnostics()).isNotNull(); +// assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); +// assertThat(diagnostics[0].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); +// ClientSideRequestStatistics[] clientStats = +// diagnostics[0] +// .getFeedResponseDiagnostics() +// .getClientSideRequestStatistics() +// .toArray(new ClientSideRequestStatistics[0]); +// assertThat(clientStats.length).isEqualTo(1); +// for (int i = 0; i < clientStats.length; i++) { +// assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); +// assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); +// assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); +// assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); +// assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); +// assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); +// } +// } +// ), +// validateAllRecordsSameIdReturned, +// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, +// NO_OTHER_DOCS_WITH_SAME_PK +// } }; } - @Test(groups = {"multi-master"}, dataProvider = "testConfigs_queryAfterCreation") + @Test(groups = {"multi-master"}, dataProvider = "testConfigs_queryAfterCreation", invocationCount = 5) public void queryAfterCreation( String testCaseId, Duration endToEndTimeout, From 552580c27734d557534fef2889967254fc8ffc88 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 13 Mar 2024 19:24:34 -0400 Subject: [PATCH 002/140] Added skeletal classes. --- .../implementation/GlobalEndpointManager.java | 3 ++ ...itionEndpointManagerForCircuitBreaker.java | 31 +++++++++++++++++++ .../IGlobalPartitionEndpointManager.java | 11 +++++++ .../implementation/routing/LocationCache.java | 12 +++++++ 4 files changed, 57 insertions(+) create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index c0d261986c2b..7febab3dc07f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -41,6 +41,7 @@ public class GlobalEndpointManager implements AutoCloseable { private final AtomicBoolean isRefreshing; private final AtomicBoolean refreshInBackground; private final Scheduler scheduler = Schedulers.newSingle(theadFactory); + private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; private volatile boolean isClosed; private AtomicBoolean firstTimeDatabaseAccountInitialization = new AtomicBoolean(true); private volatile DatabaseAccount latestDatabaseAccount; @@ -57,6 +58,8 @@ public Throwable getLatestDatabaseRefreshError() { public GlobalEndpointManager(DatabaseAccountManagerInternal owner, ConnectionPolicy connectionPolicy, Configs configs) { this.backgroundRefreshLocationTimeIntervalInMS = configs.getUnavailableLocationsExpirationTimeInSeconds() * 1000; this.maxInitializationTime = Duration.ofSeconds(configs.getGlobalEndpointManagerMaxInitializationTimeInSeconds()); + this.globalPartitionEndpointManager = new GlobalPartitionEndpointManagerForCircuitBreaker(); + try { this.locationCache = new LocationCache( connectionPolicy, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java new file mode 100644 index 000000000000..23a021262633 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -0,0 +1,31 @@ +package com.azure.cosmos.implementation; + +import java.time.Instant; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +public class GlobalPartitionEndpointManagerForCircuitBreaker implements IGlobalPartitionEndpointManager { + + private final ConcurrentHashMap pkRangeToFailover; + + public GlobalPartitionEndpointManagerForCircuitBreaker() { + this.pkRangeToFailover = new ConcurrentHashMap<>(); + } + + @Override + public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request) { + return false; + } + + // what is the point of an inner class? + // at a high-level, the below class needs: + // 1. consecutive failure count tracker + // 2. unavailable since + // 3. regions unavailable in + // 4. failure type + static class PartitionLevelFailoverInfoForCircuitBreaker { + private final AtomicInteger consecutiveFailureCount = new AtomicInteger(); + private final AtomicReference unavailableSince = new AtomicReference<>(Instant.now()); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java new file mode 100644 index 000000000000..ec67c6f32d16 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java @@ -0,0 +1,11 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +public interface IGlobalPartitionEndpointManager { + + boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request); + + +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index a7bbbfc70855..f6e226498f7e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -132,6 +132,14 @@ public List getAvailableWriteEndpoints() { return this.locationInfo.availableWriteEndpointByLocation.values().stream().collect(Collectors.toList()); } + public List getAvailableWriteEndpointsForPartitionKeyRange(RxDocumentServiceRequest request) { + return new ArrayList<>(); + } + + public List getAvailableReadEndpointsForPartitionKeyRange(RxDocumentServiceRequest request) { + return new ArrayList<>(); + } + /** * Marks the current location unavailable for read */ @@ -146,6 +154,10 @@ public void markEndpointUnavailableForWrite(URI endpoint) { this.markEndpointUnavailable(endpoint, OperationType.Write); } + public void markEndpointUnavailableForWriteAtPartitionKeyRangeScope(RxDocumentServiceRequest request) {} + + public void markEndpointUnavailableForReadAtPartitionKeyRangeScope(RxDocumentServiceRequest request) {} + /** * Invoked when {@link DatabaseAccount} is read * @param databaseAccount READ DatabaseAccount From 097f7a56844d2856e57ca524dacbfc78def8656b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 14 Mar 2024 15:06:48 -0400 Subject: [PATCH 003/140] Added skeletal classes and method calls. --- .../implementation/ClientRetryPolicy.java | 10 ++++ .../implementation/GlobalEndpointManager.java | 8 +++ ...itionEndpointManagerForCircuitBreaker.java | 53 +++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index cd540f2fe580..4beaf48255cd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -317,6 +317,16 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( boolean nonIdempotentWriteRetriesEnabled, CosmosException cosmosException) { + // if partition-level circuit breaker is enabled + if (false) { + + if (isReadRequest) { + this.globalEndpointManager.markPartitionKeyRangeAsUnavailableForRead(this.request); + } else { + this.globalEndpointManager.markPartitionKeyRangeAsUnavailableForWrite(this.request); + } + } + // The request has failed with 503, SDK need to decide whether it is safe to retry for write operations // For server generated retries, it is safe to retry // For SDK generated 503, it will be more tricky as we have to decide the cause of it. For any causes that SDK not sure whether the request diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index 7febab3dc07f..26ae88380334 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -169,6 +169,14 @@ public void markEndpointUnavailableForWrite(URI endpoint) { this.locationCache.markEndpointUnavailableForWrite(endpoint); } + public void markPartitionKeyRangeAsUnavailableForWrite(RxDocumentServiceRequest request) { + this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); + } + + public void markPartitionKeyRangeAsUnavailableForRead(RxDocumentServiceRequest request) { + this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); + } + public boolean canUseMultipleWriteLocations() { return this.locationCache.canUseMultipleWriteLocations(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 23a021262633..88052d68d5d5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -1,6 +1,11 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.azure.cosmos.implementation; +import java.net.URI; import java.time.Instant; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -25,6 +30,54 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re // 3. regions unavailable in // 4. failure type static class PartitionLevelFailoverInfoForCircuitBreaker { + + private final AtomicReference partitionLevelFailureMetadata; + private final Set failedLocations = ConcurrentHashMap.newKeySet(); + private final Object failedRegionLock = new Object(); + // points to the current location a request will be routed to + private URI current; + + PartitionLevelFailoverInfoForCircuitBreaker(URI current) { + this.partitionLevelFailureMetadata = new AtomicReference<>(new PartitionLevelFailureMetadata()); + this.current = current; + } + + // method purpose - choose the next possible region for this partition + public boolean tryMoveNextLocation(Set locations, URI failedLocation) { + + if (failedLocation != this.current) { + // a different thread has moved it to the next location + return true; + } + + synchronized (failedRegionLock) { + + if (failedLocation != this.current) { + // a different thread has moved it to the next location + return true; + } + + for (URI location : locations) { + + if (this.current == location) { + continue; + } + + if (this.failedLocations.contains(location)) { + continue; + } + + this.failedLocations.add(failedLocation); + this.current = location; + return true; + } + } + + return false; + } + } + + static class PartitionLevelFailureMetadata { private final AtomicInteger consecutiveFailureCount = new AtomicInteger(); private final AtomicReference unavailableSince = new AtomicReference<>(Instant.now()); } From e97f50b3abfccc17402c7b178858c6930934983e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 15 Mar 2024 10:42:55 -0400 Subject: [PATCH 004/140] Added skeletal classes and method calls. --- .../AddressResolverTest.java | 6 +- .../implementation/GlobalEndpointManager.java | 4 + ...itionEndpointManagerForCircuitBreaker.java | 174 +++++++++++++++++- .../IGlobalPartitionEndpointManager.java | 5 +- .../directconnectivity/AddressResolver.java | 10 +- .../GlobalAddressResolver.java | 2 +- 6 files changed, 188 insertions(+), 13 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java index ad8ae1693e52..6cd5b4d68224 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java @@ -6,6 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentCollection; +import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.InvalidPartitionException; @@ -65,14 +66,15 @@ public class AddressResolverTest { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache fabricAddressCache; - + private GlobalEndpointManager globalEndpointManager; private int collectionCacheRefreshedCount; private Map routingMapRefreshCount; private Map addressesRefreshCount; @BeforeClass(groups = "unit") public void before_AddressResolverTest() throws Exception { - this.addressResolver = new AddressResolver(); + this.globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + this.addressResolver = new AddressResolver(this.globalEndpointManager); this.collectionCache = Mockito.mock(RxCollectionCache.class); this.collectionRoutingMapCache = Mockito.mock(ICollectionRoutingMapCache.class); this.fabricAddressCache = Mockito.mock(IAddressCache.class); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index 26ae88380334..8968e78643a3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -177,6 +177,10 @@ public void markPartitionKeyRangeAsUnavailableForRead(RxDocumentServiceRequest r this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); } + public boolean tryAddPartitionLevelOverride(RxDocumentServiceRequest request) { + return this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); + } + public boolean canUseMultipleWriteLocations() { return this.locationCache.canUseMultipleWriteLocations(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 88052d68d5d5..227565df6728 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -3,8 +3,12 @@ package com.azure.cosmos.implementation; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.net.URI; import java.time.Instant; +import java.util.HashSet; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -12,14 +16,131 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker implements IGlobalPartitionEndpointManager { - private final ConcurrentHashMap pkRangeToFailover; + private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); + + private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; public GlobalPartitionEndpointManagerForCircuitBreaker() { - this.pkRangeToFailover = new ConcurrentHashMap<>(); + this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); } @Override public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request) { + + if (request == null) { + throw new IllegalArgumentException("request cannot be null!"); + } + + if (request.requestContext == null) { + + if (logger.isDebugEnabled()) { + logger.warn("requestContext is null!"); + } + + return false; + } + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + URI failedLocation = request.requestContext.locationEndpointToRoute; + + if (partitionKeyRange == null) { + return false; + } + + PartitionLevelFailoverInfoForCircuitBreaker partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + + if (partitionKeyRangeFailoverInfoAsVal == null) { + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfoForCircuitBreaker(); + } + + return partitionKeyRangeFailoverInfoAsVal; + }); + + if (partitionLevelFailoverInfo.tryMoveNextLocation(new HashSet<>(), failedLocation)) { + return true; + } + + this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); + return false; + } + + @Override + public boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request) { + if (request == null) { + throw new IllegalArgumentException("request cannot be null!"); + } + + if (request.requestContext == null) { + + if (logger.isDebugEnabled()) { + logger.warn("requestContext is null!"); + } + + return false; + } + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + URI failedLocation = request.requestContext.locationEndpointToRoute; + + if (partitionKeyRange == null) { + return false; + } + + PartitionLevelFailoverInfoForCircuitBreaker partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + + if (partitionKeyRangeFailoverInfoAsVal == null) { + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfoForCircuitBreaker(); + } + + return partitionKeyRangeFailoverInfoAsVal; + }); + + partitionLevelFailoverInfo.bookmarkFailure(failedLocation); + + if (partitionLevelFailoverInfo.tryMoveNextLocation(new HashSet<>(), failedLocation)) { + return true; + } + + this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); + return false; + } + + @Override + public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request) { + + if (request == null) { + throw new IllegalArgumentException("request cannot be null!"); + } + + if (request.requestContext == null) { + + if (logger.isDebugEnabled()) { + logger.warn("requestContext is null!"); + } + + return false; + } + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + + if (partitionKeyRange == null) { + return false; + } + + if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { + PartitionLevelFailoverInfoForCircuitBreaker partitionLevelFailoverInfoForCircuitBreaker = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + + URI current = partitionLevelFailoverInfoForCircuitBreaker.current; + + if (logger.isDebugEnabled()) { + logger.debug("Moving request to location : {}", current.getPath()); + } + + request.requestContext.routeToLocation(current); + + return true; + } + return false; } @@ -31,20 +152,57 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re // 4. failure type static class PartitionLevelFailoverInfoForCircuitBreaker { - private final AtomicReference partitionLevelFailureMetadata; + private final ConcurrentHashMap partitionLevelFailureMetadata; private final Set failedLocations = ConcurrentHashMap.newKeySet(); private final Object failedRegionLock = new Object(); // points to the current location a request will be routed to private URI current; - PartitionLevelFailoverInfoForCircuitBreaker(URI current) { - this.partitionLevelFailureMetadata = new AtomicReference<>(new PartitionLevelFailureMetadata()); - this.current = current; + PartitionLevelFailoverInfoForCircuitBreaker() { + this.partitionLevelFailureMetadata = new ConcurrentHashMap<>(); + } + + // bookmark failure + public void bookmarkFailure(URI failedLocation) { + this.partitionLevelFailureMetadata.compute(failedLocation, (locationAsKey, partitionLevelFailureMetadataAsVal) -> { + + if (partitionLevelFailureMetadataAsVal == null) { + partitionLevelFailureMetadataAsVal = new PartitionLevelFailureMetadata(); + } + + // todo : make threshold for marking a location as failed more comprehensive + if (partitionLevelFailureMetadataAsVal.consecutiveFailureCount.incrementAndGet() > 5) { + partitionLevelFailureMetadataAsVal.unavailableSince.set(Instant.now()); + this.failedLocations.add(failedLocation); + } + + return partitionLevelFailureMetadataAsVal; + }); + } + + // bookmark success + public void bookmarkSuccess(URI succeededLocation) { + this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, partitionLevelFailureMetadataAsVal) -> { + + if (partitionLevelFailureMetadataAsVal == null) { + return new PartitionLevelFailureMetadata(); + } + + if (partitionLevelFailureMetadataAsVal.consecutiveFailureCount.get() > 1) { + partitionLevelFailureMetadataAsVal.consecutiveFailureCount.decrementAndGet(); + } + + return partitionLevelFailureMetadataAsVal; + }); } // method purpose - choose the next possible region for this partition public boolean tryMoveNextLocation(Set locations, URI failedLocation) { + if (partitionLevelFailureMetadata.get().consecutiveFailureCount.incrementAndGet() < 5) { + return false; + } + if (failedLocation != this.current) { // a different thread has moved it to the next location return true; @@ -75,6 +233,10 @@ public boolean tryMoveNextLocation(Set locations, URI failedLocation) { return false; } + + public boolean tryMarkLocationAsAvailable(URI previouslyFailedLocation) { + + } } static class PartitionLevelFailureMetadata { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java index ec67c6f32d16..f8b51462bec3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java @@ -4,8 +4,7 @@ package com.azure.cosmos.implementation; public interface IGlobalPartitionEndpointManager { - boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request); - - + boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request); + boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index 33e55ff5c83e..f04315511df1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.BadRequestException; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.Exceptions; +import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.InternalServerErrorException; @@ -54,8 +55,10 @@ public class AddressResolver implements IAddressResolver { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache addressCache; + private GlobalEndpointManager globalEndpointManager; - public AddressResolver() { + public AddressResolver(GlobalEndpointManager globalEndpointManager) { + this.globalEndpointManager = globalEndpointManager; } public void initializeCaches( @@ -83,6 +86,11 @@ public Mono resolveAsync( request.requestContext.resolvedPartitionKeyRange = result.TargetPartitionKeyRange; + // TODO: use GlobalPartitionEndpointManager to add a partition-level request override + if (!this.globalEndpointManager.tryAddPartitionLevelOverride(request)) { + return this.resolveAsync(request, forceRefreshPartitionAddresses); + } + return Mono.just(result.Addresses); }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index fed1cbde61a8..4ec19035891a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -290,7 +290,7 @@ private EndpointCache getOrAddEndpoint(URI endpoint) { this.connectionPolicy, this.proactiveOpenConnectionsProcessor, this.gatewayServerErrorInjector); - AddressResolver addressResolver = new AddressResolver(); + AddressResolver addressResolver = new AddressResolver(this.endpointManager); addressResolver.initializeCaches(this.collectionCache, this.routingMapProvider, gatewayAddressCache); EndpointCache cache = new EndpointCache(); cache.addressCache = gatewayAddressCache; From 7bac0f7dc2584623ce4b595cefe32bb5353deaa1 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 16 Mar 2024 19:08:01 -0400 Subject: [PATCH 005/140] Added skeletal flow for marking a partition as unavailable for read / write. --- ...eCollectionAwareClientRetryPolicyTest.java | 14 +- .../query/DocumentProducerTest.java | 5 +- .../implementation/ClientRetryPolicy.java | 14 +- .../azure/cosmos/implementation/Configs.java | 5 + .../implementation/GlobalEndpointManager.java | 14 -- ...itionEndpointManagerForCircuitBreaker.java | 176 +++++++++--------- .../cosmos/implementation/RetryPolicy.java | 18 +- .../implementation/RxDocumentClientImpl.java | 8 +- 8 files changed, 139 insertions(+), 115 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java index d5576e4cf44e..d20c623b01f8 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java @@ -23,9 +23,10 @@ public class RenameCollectionAwareClientRetryPolicyTest { @Test(groups = "unit", timeOut = TIMEOUT) public void onBeforeSendRequestNotInvoked() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); @@ -52,8 +53,9 @@ public void onBeforeSendRequestNotInvoked() { @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithNotFoundStatusCode() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); @@ -78,8 +80,10 @@ public void shouldRetryWithNotFoundStatusCode() { @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithNotFoundStatusCodeAndReadSessionNotAvailableSubStatusCode() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); @@ -115,8 +119,10 @@ public void shouldRetryWithNotFoundStatusCodeAndReadSessionNotAvailableSubStatus @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithGenericException() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); - IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy()); + IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); ISessionContainer sessionContainer = Mockito.mock(ISessionContainer.class); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index bee53a8ad198..25fc6a0c66a9 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -11,6 +11,7 @@ import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.IRetryPolicyFactory; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RetryPolicy; @@ -118,9 +119,11 @@ private IRetryPolicyFactory mockDocumentClientIRetryPolicyFactory() { } GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(url).when(globalEndpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); doReturn(false).when(globalEndpointManager).isClosed(); - return new RetryPolicy(mockDiagnosticsClientContext(), globalEndpointManager, ConnectionPolicy.getDefaultPolicy()); + return new RetryPolicy(mockDiagnosticsClientContext(), globalEndpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); } @Test(groups = {"unit"}, dataProvider = "splitParamProvider", timeOut = TIMEOUT) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 4beaf48255cd..c1baea4de3a3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -52,12 +52,14 @@ public class ClientRetryPolicy extends DocumentClientRetryPolicy { private RxDocumentServiceRequest request; private RxCollectionCache rxCollectionCache; private final FaultInjectionRequestContext faultInjectionRequestContext; + private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, GlobalEndpointManager globalEndpointManager, boolean enableEndpointDiscovery, ThrottlingRetryOptions throttlingRetryOptions, - RxCollectionCache rxCollectionCache) { + RxCollectionCache rxCollectionCache, + IGlobalPartitionEndpointManager globalPartitionEndpointManager) { this.globalEndpointManager = globalEndpointManager; this.failoverRetryCount = 0; @@ -73,6 +75,7 @@ public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, false); this.rxCollectionCache = rxCollectionCache; this.faultInjectionRequestContext = new FaultInjectionRequestContext(); + this.globalPartitionEndpointManager = globalPartitionEndpointManager; } @Override @@ -318,13 +321,8 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( CosmosException cosmosException) { // if partition-level circuit breaker is enabled - if (false) { - - if (isReadRequest) { - this.globalEndpointManager.markPartitionKeyRangeAsUnavailableForRead(this.request); - } else { - this.globalEndpointManager.markPartitionKeyRangeAsUnavailableForWrite(this.request); - } + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(this.request); } // The request has failed with 503, SDK need to decide whether it is safe to retry for write operations diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 611762ff2b1a..f7d8b3c9c359 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -170,6 +170,7 @@ public class Configs { public static final int MIN_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED = 1; public static final String TCP_CONNECTION_ACQUISITION_TIMEOUT_IN_MS = "COSMOS.TCP_CONNECTION_ACQUISITION_TIMEOUT_IN_MS"; + private static final boolean DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED = false; public Configs() { this.sslContext = sslContextInit(); @@ -494,4 +495,8 @@ public static Duration getTcpConnectionAcquisitionTimeout(int defaultValueInMs) ) ); } + + public static boolean isPartitionLevelCircuitBreakerEnabled() { + return DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index 8968e78643a3..22a5725f40b4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -41,7 +41,6 @@ public class GlobalEndpointManager implements AutoCloseable { private final AtomicBoolean isRefreshing; private final AtomicBoolean refreshInBackground; private final Scheduler scheduler = Schedulers.newSingle(theadFactory); - private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; private volatile boolean isClosed; private AtomicBoolean firstTimeDatabaseAccountInitialization = new AtomicBoolean(true); private volatile DatabaseAccount latestDatabaseAccount; @@ -58,7 +57,6 @@ public Throwable getLatestDatabaseRefreshError() { public GlobalEndpointManager(DatabaseAccountManagerInternal owner, ConnectionPolicy connectionPolicy, Configs configs) { this.backgroundRefreshLocationTimeIntervalInMS = configs.getUnavailableLocationsExpirationTimeInSeconds() * 1000; this.maxInitializationTime = Duration.ofSeconds(configs.getGlobalEndpointManagerMaxInitializationTimeInSeconds()); - this.globalPartitionEndpointManager = new GlobalPartitionEndpointManagerForCircuitBreaker(); try { this.locationCache = new LocationCache( @@ -169,18 +167,6 @@ public void markEndpointUnavailableForWrite(URI endpoint) { this.locationCache.markEndpointUnavailableForWrite(endpoint); } - public void markPartitionKeyRangeAsUnavailableForWrite(RxDocumentServiceRequest request) { - this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); - } - - public void markPartitionKeyRangeAsUnavailableForRead(RxDocumentServiceRequest request) { - this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); - } - - public boolean tryAddPartitionLevelOverride(RxDocumentServiceRequest request) { - return this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); - } - public boolean canUseMultipleWriteLocations() { return this.locationCache.canUseMultipleWriteLocations(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 227565df6728..42217da2be33 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -3,14 +3,17 @@ package com.azure.cosmos.implementation; +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.net.URI; import java.time.Instant; import java.util.HashSet; +import java.util.List; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -18,10 +21,12 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker implements IGlobalP private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); - private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; + private final GlobalEndpointManager globalEndpointManager; + private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; - public GlobalPartitionEndpointManagerForCircuitBreaker() { + public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); + this.globalEndpointManager = globalEndpointManager; } @Override @@ -47,57 +52,34 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re return false; } - PartitionLevelFailoverInfoForCircuitBreaker partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + AtomicBoolean isFailoverPossible = new AtomicBoolean(true); + AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfoForCircuitBreaker(); - } - - return partitionKeyRangeFailoverInfoAsVal; - }); - - if (partitionLevelFailoverInfo.tryMoveNextLocation(new HashSet<>(), failedLocation)) { - return true; - } - - this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); - return false; - } - - @Override - public boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request) { - if (request == null) { - throw new IllegalArgumentException("request cannot be null!"); - } - - if (request.requestContext == null) { + this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { - if (logger.isDebugEnabled()) { - logger.warn("requestContext is null!"); + if (partitionKeyRangeFailoverInfoAsVal == null) { + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfo(); } - return false; - } - - PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; - URI failedLocation = request.requestContext.locationEndpointToRoute; + isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(failedLocation, request.isReadOnlyRequest())); - if (partitionKeyRange == null) { - return false; - } + if (isFailureThresholdBreached.get()) { - PartitionLevelFailoverInfoForCircuitBreaker partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + UnmodifiableList applicableEndpoints = request.isReadOnly() ? + this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : + this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); - if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfoForCircuitBreaker(); + isFailoverPossible.set(partitionKeyRangeFailoverInfoAsVal + .tryMoveNextLocation(applicableEndpoints, failedLocation, request.isReadOnlyRequest())); } return partitionKeyRangeFailoverInfoAsVal; }); - partitionLevelFailoverInfo.bookmarkFailure(failedLocation); - - if (partitionLevelFailoverInfo.tryMoveNextLocation(new HashSet<>(), failedLocation)) { + // set to true if and only if failure threshold exceeded for the region + // and if failover is possible + // a failover is only possible when there are available regions left to failover to + if (isFailoverPossible.get()) { return true; } @@ -105,6 +87,11 @@ public boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest requ return false; } + @Override + public boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request) { + return false; + } + @Override public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request) { @@ -128,15 +115,20 @@ public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest req } if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { - PartitionLevelFailoverInfoForCircuitBreaker partitionLevelFailoverInfoForCircuitBreaker = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); - URI current = partitionLevelFailoverInfoForCircuitBreaker.current; + // is it possible for this instance to go stale? + PartitionLevelFailoverInfo partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + + // it could be possible that this currentLocationSnapshot is stale since the ConcurrentHashMap.get + // thread won over ConcurrentHashMap.compute (can mark a location as failed), in that case, the request + // could hit possible unavailability issues again + URI currentLocationSnapshot = partitionLevelFailoverInfo.current; if (logger.isDebugEnabled()) { - logger.debug("Moving request to location : {}", current.getPath()); + logger.debug("Moving request to location : {}", currentLocationSnapshot.getPath()); } - request.requestContext.routeToLocation(current); + request.requestContext.routeToLocation(currentLocationSnapshot); return true; } @@ -150,20 +142,25 @@ public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest req // 2. unavailable since // 3. regions unavailable in // 4. failure type - static class PartitionLevelFailoverInfoForCircuitBreaker { + static class PartitionLevelFailoverInfo { private final ConcurrentHashMap partitionLevelFailureMetadata; - private final Set failedLocations = ConcurrentHashMap.newKeySet(); - private final Object failedRegionLock = new Object(); + private final Set failedLocations = ConcurrentHashMap.newKeySet(); // points to the current location a request will be routed to private URI current; - PartitionLevelFailoverInfoForCircuitBreaker() { + PartitionLevelFailoverInfo() { this.partitionLevelFailureMetadata = new ConcurrentHashMap<>(); } // bookmark failure - public void bookmarkFailure(URI failedLocation) { + // method purpose: + // 1. increment consecutive failure count + // 2. if failure count crosses threshold for + public boolean isFailureThresholdBreachedForLocation(URI failedLocation, boolean isReadRequest) { + + AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); + this.partitionLevelFailureMetadata.compute(failedLocation, (locationAsKey, partitionLevelFailureMetadataAsVal) -> { if (partitionLevelFailureMetadataAsVal == null) { @@ -171,13 +168,25 @@ public void bookmarkFailure(URI failedLocation) { } // todo : make threshold for marking a location as failed more comprehensive - if (partitionLevelFailureMetadataAsVal.consecutiveFailureCount.incrementAndGet() > 5) { - partitionLevelFailureMetadataAsVal.unavailableSince.set(Instant.now()); - this.failedLocations.add(failedLocation); + + if (isReadRequest) { + if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.incrementAndGet() > 5) { + partitionLevelFailureMetadataAsVal.unavailableForReadsSince.set(Instant.now()); + this.current = failedLocation; + isFailureThresholdBreached.set(true); + } + } else { + if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.incrementAndGet() > 5) { + partitionLevelFailureMetadataAsVal.unavailableForWritesSince.set(Instant.now()); + this.current = failedLocation; + isFailureThresholdBreached.set(true); + } } return partitionLevelFailureMetadataAsVal; }); + + return isFailureThresholdBreached.get(); } // bookmark success @@ -188,8 +197,8 @@ public void bookmarkSuccess(URI succeededLocation) { return new PartitionLevelFailureMetadata(); } - if (partitionLevelFailureMetadataAsVal.consecutiveFailureCount.get() > 1) { - partitionLevelFailureMetadataAsVal.consecutiveFailureCount.decrementAndGet(); + if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.get() > 1) { + partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.decrementAndGet(); } return partitionLevelFailureMetadataAsVal; @@ -197,36 +206,23 @@ public void bookmarkSuccess(URI succeededLocation) { } // method purpose - choose the next possible region for this partition - public boolean tryMoveNextLocation(Set locations, URI failedLocation) { - - if (partitionLevelFailureMetadata.get().consecutiveFailureCount.incrementAndGet() < 5) { - return false; - } - - if (failedLocation != this.current) { - // a different thread has moved it to the next location - return true; - } - - synchronized (failedRegionLock) { - - if (failedLocation != this.current) { - // a different thread has moved it to the next location - return true; + // 1. if current == failedLocation - try using a different location + // a) iterate through the list of read / write locations + // b) if location in iteration loop not part of failedLocations, then assign location to current + // 2. if current != failedLocation - a different thread has updated it + // 3. + public boolean tryMoveNextLocation(List locations, URI failedLocation, boolean isReadRequest) { + for (URI location : locations) { + if (failedLocation == this.current) { + continue; } - for (URI location : locations) { - - if (this.current == location) { - continue; - } - - if (this.failedLocations.contains(location)) { - continue; - } + // failedLocation != current + if (!this.failedLocations.contains(failedLocation)) { - this.failedLocations.add(failedLocation); + this.failedLocations.add(new FailedLocation(failedLocation, isReadRequest)); this.current = location; + return true; } } @@ -235,12 +231,24 @@ public boolean tryMoveNextLocation(Set locations, URI failedLocation) { } public boolean tryMarkLocationAsAvailable(URI previouslyFailedLocation) { - + return false; } } static class PartitionLevelFailureMetadata { - private final AtomicInteger consecutiveFailureCount = new AtomicInteger(); - private final AtomicReference unavailableSince = new AtomicReference<>(Instant.now()); + private final AtomicInteger consecutiveFailureCountForWrites = new AtomicInteger(); + private final AtomicInteger consecutiveFailureCountForReads = new AtomicInteger(); + private final AtomicReference unavailableForWritesSince = new AtomicReference<>(Instant.MIN); + private final AtomicReference unavailableForReadsSince = new AtomicReference<>(Instant.MIN); + } + + static class FailedLocation { + private final URI location; + private final boolean isRead; + + FailedLocation(URI location, boolean isRead) { + this.location = location; + this.isRead = isRead; + } } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java index 57ac7bfbbbdf..4a11d2919c19 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java @@ -15,15 +15,22 @@ public class RetryPolicy implements IRetryPolicyFactory { private final DiagnosticsClientContext diagnosticsClientContext; private final GlobalEndpointManager globalEndpointManager; + private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; private final boolean enableEndpointDiscovery; private final ThrottlingRetryOptions throttlingRetryOptions; private RxCollectionCache rxCollectionCache; - public RetryPolicy(DiagnosticsClientContext diagnosticsClientContext, GlobalEndpointManager globalEndpointManager, ConnectionPolicy connectionPolicy) { + public RetryPolicy( + DiagnosticsClientContext diagnosticsClientContext, + GlobalEndpointManager globalEndpointManager, + ConnectionPolicy connectionPolicy, + IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + this.diagnosticsClientContext = diagnosticsClientContext; this.enableEndpointDiscovery = connectionPolicy.isEndpointDiscoveryEnabled(); this.globalEndpointManager = globalEndpointManager; this.throttlingRetryOptions = connectionPolicy.getThrottlingRetryOptions(); + this.globalPartitionEndpointManager = globalPartitionEndpointManager; } @Override @@ -32,8 +39,13 @@ public DocumentClientRetryPolicy getRequestPolicy(DiagnosticsClientContext clien if (clientContextOverride != null) { effectiveClientContext = clientContextOverride; } - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(effectiveClientContext, - this.globalEndpointManager, this.enableEndpointDiscovery, this.throttlingRetryOptions, this.rxCollectionCache); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy( + effectiveClientContext, + this.globalEndpointManager, + this.enableEndpointDiscovery, + this.throttlingRetryOptions, + this.rxCollectionCache, + this.globalPartitionEndpointManager); return clientRetryPolicy; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 88f8d1ca5eef..7708b76069b5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -216,6 +216,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization */ private final QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; private final GlobalEndpointManager globalEndpointManager; + private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; private final RetryPolicy retryPolicy; private HttpClient reactorHttpClient; private Function httpClientInterceptor; @@ -508,7 +509,12 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.reactorHttpClient = httpClient(); this.globalEndpointManager = new GlobalEndpointManager(asDatabaseAccountManagerInternal(), this.connectionPolicy, /**/configs); - this.retryPolicy = new RetryPolicy(this, this.globalEndpointManager, this.connectionPolicy); + this.globalPartitionEndpointManager = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); + this.retryPolicy = new RetryPolicy( + this, + this.globalEndpointManager, + this.connectionPolicy, + this.globalPartitionEndpointManager); this.resetSessionTokenRetryPolicy = retryPolicy; CpuMemoryMonitor.register(this); this.queryPlanCache = new ConcurrentHashMap<>(); From ed15a832ca010c56664a72242031db8788d19a34 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 18 Mar 2024 09:04:14 -0400 Subject: [PATCH 006/140] Added skeletal flow for marking a partition as unavailable for read / write. --- .../implementation/ClientRetryPolicyTest.java | 41 +++++--- .../AddressResolverTest.java | 8 +- .../GlobalAddressResolverTest.java | 7 +- ...itionEndpointManagerForCircuitBreaker.java | 94 ++++++++++++++----- .../IGlobalPartitionEndpointManager.java | 1 + .../implementation/RxDocumentClientImpl.java | 3 +- .../directconnectivity/AddressResolver.java | 15 ++- .../GlobalAddressResolver.java | 13 ++- .../directconnectivity/IAddressResolver.java | 3 + .../directconnectivity/StoreClient.java | 8 +- .../implementation/routing/LocationCache.java | 12 --- 11 files changed, 143 insertions(+), 62 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java index 4a9cba157c58..5492c4a843be 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java @@ -44,9 +44,10 @@ public static Object[][] operationProvider() { public void networkFailureOnRead() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException"); CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -83,6 +84,8 @@ public void shouldRetryOnGatewayTimeout( boolean shouldCrossRegionRetry) throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(true)); ClientRetryPolicy clientRetryPolicy = @@ -91,7 +94,8 @@ public void shouldRetryOnGatewayTimeout( endpointManager, true, throttlingRetryOptions, - null); + null, + globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.REQUEST_TIMEOUT, exception); @@ -123,10 +127,12 @@ public void shouldRetryOnGatewayTimeout( public void tcpNetworkFailureOnRead() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; GoneException goneException = new GoneException(exception); @@ -169,9 +175,11 @@ public void tcpNetworkFailureOnRead() throws Exception { public void networkFailureOnWrite() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException");; CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -198,10 +206,12 @@ public void networkFailureOnWrite() throws Exception { public void tcpNetworkFailureOnWrite() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); //Non retribale exception for write Exception exception = new SocketException("Dummy SocketException");; @@ -232,7 +242,7 @@ public void tcpNetworkFailureOnWrite() throws Exception { cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, goneException); Mockito.doReturn(true).when(endpointManager).canUseMultipleWriteLocations(Mockito.any(RxDocumentServiceRequest.class)); - clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); clientRetryPolicy.onBeforeSendRequest(dsr); for (int i = 0; i < 10; i++) { @@ -264,9 +274,11 @@ public void tcpNetworkFailureOnWrite() throws Exception { public void networkFailureOnUpsert() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException"); CosmosException cosmosException = BridgeInternal.createCosmosException(null, HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, exception); @@ -295,10 +307,12 @@ public void networkFailureOnUpsert() throws Exception { public void tcpNetworkFailureOnUpsert() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); Exception exception = new SocketException("Dummy SocketException"); GoneException goneException = new GoneException(exception); @@ -329,9 +343,11 @@ public void tcpNetworkFailureOnUpsert() throws Exception { public void networkFailureOnDelete() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; CosmosException cosmosException = BridgeInternal.createCosmosException( @@ -361,10 +377,12 @@ public void networkFailureOnDelete() throws Exception { public void tcpNetworkFailureOnDelete() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); Mockito.doReturn(2).when(endpointManager).getPreferredLocationCount(); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, retryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; GoneException goneException = new GoneException(exception); @@ -395,9 +413,10 @@ public void tcpNetworkFailureOnDelete() throws Exception { public void onBeforeSendRequestNotInvoked() { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); - ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null); + ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); Exception exception = ReadTimeoutException.INSTANCE; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java index 6cd5b4d68224..bc40a4a1a1a3 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java @@ -9,8 +9,8 @@ import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; +import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.InvalidPartitionException; -import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.NotFoundException; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; @@ -66,15 +66,15 @@ public class AddressResolverTest { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache fabricAddressCache; - private GlobalEndpointManager globalEndpointManager; + private IGlobalPartitionEndpointManager globalPartitionEndpointManager; private int collectionCacheRefreshedCount; private Map routingMapRefreshCount; private Map addressesRefreshCount; @BeforeClass(groups = "unit") public void before_AddressResolverTest() throws Exception { - this.globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); - this.addressResolver = new AddressResolver(this.globalEndpointManager); + this.globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + this.addressResolver = new AddressResolver(this.globalPartitionEndpointManager); this.collectionCache = Mockito.mock(RxCollectionCache.class); this.collectionRoutingMapCache = Mockito.mock(ICollectionRoutingMapCache.class); this.fabricAddressCache = Mockito.mock(IAddressCache.class); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java index ca2f80545f7f..414e3efd2110 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java @@ -13,6 +13,7 @@ import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; +import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; @@ -51,6 +52,7 @@ public class GlobalAddressResolverTest { private HttpClient httpClient; private GlobalEndpointManager endpointManager; + private IGlobalPartitionEndpointManager globalPartitionEndpointManager; private IAuthorizationTokenProvider authorizationTokenProvider; private UserAgentContainer userAgentContainer; private RxCollectionCache collectionCache; @@ -111,7 +113,7 @@ public void resolveAsync() throws Exception { GlobalAddressResolver globalAddressResolver = new GlobalAddressResolver(mockDiagnosticsClientContext(), httpClient, endpointManager, Protocol.HTTPS, authorizationTokenProvider, collectionCache, routingMapProvider, userAgentContainer, - serviceConfigReader, connectionPolicy, null); + serviceConfigReader, connectionPolicy, null, globalPartitionEndpointManager); RxDocumentServiceRequest request; request = RxDocumentServiceRequest.createFromName(mockDiagnosticsClientContext(), OperationType.Read, @@ -146,7 +148,8 @@ public void submitOpenConnectionTasksAndInitCaches() { userAgentContainer, serviceConfigReader, connectionPolicy, - null); + null, + globalPartitionEndpointManager); GlobalAddressResolver.EndpointCache endpointCache = new GlobalAddressResolver.EndpointCache(); GatewayAddressCache gatewayAddressCache = Mockito.mock(GatewayAddressCache.class); endpointCache.addressCache = gatewayAddressCache; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 42217da2be33..a0e138d7253b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -69,8 +69,7 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); - isFailoverPossible.set(partitionKeyRangeFailoverInfoAsVal - .tryMoveNextLocation(applicableEndpoints, failedLocation, request.isReadOnlyRequest())); + isFailoverPossible.set(partitionKeyRangeFailoverInfoAsVal.tryMoveNextLocation(applicableEndpoints, failedLocation, request.isReadOnlyRequest())); } return partitionKeyRangeFailoverInfoAsVal; @@ -88,12 +87,7 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re } @Override - public boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request) { - return false; - } - - @Override - public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request) { + public boolean tryBookmarkPartitionKeyRangeSuccess(RxDocumentServiceRequest request) { if (request == null) { throw new IllegalArgumentException("request cannot be null!"); @@ -114,26 +108,72 @@ public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest req return false; } + URI succeededLocation = request.requestContext.locationEndpointToRoute; + if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { + this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { - // is it possible for this instance to go stale? - PartitionLevelFailoverInfo partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + if (partitionKeyRangeFailoverInfoAsVal == null) { + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfo(); + } - // it could be possible that this currentLocationSnapshot is stale since the ConcurrentHashMap.get - // thread won over ConcurrentHashMap.compute (can mark a location as failed), in that case, the request - // could hit possible unavailability issues again - URI currentLocationSnapshot = partitionLevelFailoverInfo.current; + partitionKeyRangeFailoverInfoAsVal.bookmarkSuccess(succeededLocation, request.isReadOnlyRequest()); + return partitionKeyRangeFailoverInfoAsVal; + }); + } - if (logger.isDebugEnabled()) { - logger.debug("Moving request to location : {}", currentLocationSnapshot.getPath()); - } + return false; + } - request.requestContext.routeToLocation(currentLocationSnapshot); + @Override + public boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request) { + return false; + } - return true; - } + @Override + public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request) { - return false; + return true; + +// if (request == null) { +// throw new IllegalArgumentException("request cannot be null!"); +// } +// +// if (request.requestContext == null) { +// +// if (logger.isDebugEnabled()) { +// logger.warn("requestContext is null!"); +// } +// +// return false; +// } +// +// PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; +// +// if (partitionKeyRange == null) { +// return false; +// } +// +// if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { +// +// // is it possible for this instance to go stale? +// PartitionLevelFailoverInfo partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); +// +// // it could be possible that this currentLocationSnapshot is stale since the ConcurrentHashMap.get +// // thread won over ConcurrentHashMap.compute (can mark a location as failed), in that case, the request +// // could hit possible unavailability issues again +// URI currentLocationSnapshot = partitionLevelFailoverInfo.current; +// +// if (logger.isDebugEnabled()) { +// logger.debug("Moving request to location : {}", currentLocationSnapshot.getPath()); +// } +// +// request.requestContext.routeToLocation(currentLocationSnapshot); +// +// return true; +// } +// +// return false; } // what is the point of an inner class? @@ -190,15 +230,21 @@ public boolean isFailureThresholdBreachedForLocation(URI failedLocation, boolean } // bookmark success - public void bookmarkSuccess(URI succeededLocation) { + public void bookmarkSuccess(URI succeededLocation, boolean isReadRequest) { this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, partitionLevelFailureMetadataAsVal) -> { if (partitionLevelFailureMetadataAsVal == null) { return new PartitionLevelFailureMetadata(); } - if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.get() > 1) { - partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.decrementAndGet(); + if (isReadRequest) { + if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.get() > 1) { + partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.decrementAndGet(); + } + } else { + if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.get() > 1) { + partitionLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.decrementAndGet(); + } } return partitionLevelFailureMetadataAsVal; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java index f8b51462bec3..4621077747aa 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java @@ -5,6 +5,7 @@ public interface IGlobalPartitionEndpointManager { boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request); + boolean tryBookmarkPartitionKeyRangeSuccess(RxDocumentServiceRequest request); boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request); boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 7708b76069b5..a892e4be3120 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -681,7 +681,8 @@ private void initializeDirectConnectivity() { // this.gatewayConfigurationReader, null, this.connectionPolicy, - this.apiType); + this.apiType, + this.globalPartitionEndpointManager); this.storeClientFactory = new StoreClientFactory( this.addressResolver, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index f04315511df1..fa6e67056ed8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -9,9 +9,9 @@ import com.azure.cosmos.implementation.BadRequestException; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.Exceptions; -import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; +import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.InternalServerErrorException; import com.azure.cosmos.implementation.InvalidPartitionException; import com.azure.cosmos.implementation.NotFoundException; @@ -55,10 +55,10 @@ public class AddressResolver implements IAddressResolver { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache addressCache; - private GlobalEndpointManager globalEndpointManager; + private IGlobalPartitionEndpointManager globalPartitionEndpointManager; - public AddressResolver(GlobalEndpointManager globalEndpointManager) { - this.globalEndpointManager = globalEndpointManager; + public AddressResolver(IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + this.globalPartitionEndpointManager = globalPartitionEndpointManager; } public void initializeCaches( @@ -87,7 +87,7 @@ public Mono resolveAsync( request.requestContext.resolvedPartitionKeyRange = result.TargetPartitionKeyRange; // TODO: use GlobalPartitionEndpointManager to add a partition-level request override - if (!this.globalEndpointManager.tryAddPartitionLevelOverride(request)) { + if (!this.globalPartitionEndpointManager.tryAddPartitionKeyRangeLevelOverride(request)) { return this.resolveAsync(request, forceRefreshPartitionAddresses); } @@ -105,6 +105,11 @@ public void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proact throw new NotImplementedException("setOpenConnectionsProcessor is not supported on AddressResolver"); } + @Override + public IGlobalPartitionEndpointManager getGlobalPartitionEndpointManager() { + return this.globalPartitionEndpointManager; + } + private static boolean isSameCollection(PartitionKeyRange initiallyResolved, PartitionKeyRange newlyResolved) { if (initiallyResolved == null) { throw new IllegalArgumentException("parent"); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index 4ec19035891a..32eb60f9b121 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -12,6 +12,7 @@ import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; +import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.RxDocumentServiceRequest; @@ -47,6 +48,7 @@ public class GlobalAddressResolver implements IAddressResolver { private final static int MaxBackupReadRegions = 3; private final DiagnosticsClientContext diagnosticsClientContext; private final GlobalEndpointManager endpointManager; + private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; private final Protocol protocol; private final IAuthorizationTokenProvider tokenProvider; private final UserAgentContainer userAgentContainer; @@ -73,7 +75,8 @@ public GlobalAddressResolver( UserAgentContainer userAgentContainer, GatewayServiceConfigurationReader serviceConfigReader, ConnectionPolicy connectionPolicy, - ApiType apiType) { + ApiType apiType, + IGlobalPartitionEndpointManager globalPartitionEndpointManager) { this.diagnosticsClientContext = diagnosticsClientContext; this.httpClient = httpClient; this.endpointManager = endpointManager; @@ -90,6 +93,7 @@ public GlobalAddressResolver( this.maxEndpoints = maxBackupReadEndpoints + 2; // for write and alternate write getEndpoint (during failover) this.addressCacheByEndpoint = new ConcurrentHashMap<>(); this.apiType = apiType; + this.globalPartitionEndpointManager = globalPartitionEndpointManager; for (URI endpoint : endpointManager.getWriteEndpoints()) { this.getOrAddEndpoint(endpoint); @@ -245,6 +249,11 @@ public void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proact } } + @Override + public IGlobalPartitionEndpointManager getGlobalPartitionEndpointManager() { + return this.globalPartitionEndpointManager; + } + @Override public Mono resolveAsync(RxDocumentServiceRequest request, boolean forceRefresh) { IAddressResolver resolver = this.getAddressResolver(request); @@ -290,7 +299,7 @@ private EndpointCache getOrAddEndpoint(URI endpoint) { this.connectionPolicy, this.proactiveOpenConnectionsProcessor, this.gatewayServerErrorInjector); - AddressResolver addressResolver = new AddressResolver(this.endpointManager); + AddressResolver addressResolver = new AddressResolver(this.globalPartitionEndpointManager); addressResolver.initializeCaches(this.collectionCache, this.routingMapProvider, gatewayAddressCache); EndpointCache cache = new EndpointCache(); cache.addressCache = gatewayAddressCache; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java index 965c38517199..e36a299a1053 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java @@ -4,6 +4,7 @@ package com.azure.cosmos.implementation.directconnectivity; import com.azure.cosmos.CosmosContainerProactiveInitConfig; +import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.directconnectivity.rntbd.ProactiveOpenConnectionsProcessor; @@ -32,4 +33,6 @@ Mono resolveAsync( * @param proactiveOpenConnectionsProcessor the {@link ProactiveOpenConnectionsProcessor}. */ void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proactiveOpenConnectionsProcessor); + + IGlobalPartitionEndpointManager getGlobalPartitionEndpointManager(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index 179acf0e6189..3272d08a5fed 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -14,6 +14,7 @@ import com.azure.cosmos.implementation.Exceptions; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; +import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.IRetryPolicy; import com.azure.cosmos.implementation.ISessionToken; import com.azure.cosmos.implementation.InternalServerErrorException; @@ -51,7 +52,7 @@ public class StoreClient implements IStoreClient { private final DiagnosticsClientContext diagnosticsClientContext; private final Logger logger = LoggerFactory.getLogger(StoreClient.class); private final GatewayServiceConfigurationReader serviceConfigurationReader; - + private final IAddressResolver addressResolver; private final SessionContainer sessionContainer; private final ReplicatedResourceClient replicatedResourceClient; private final TransportClient transportClient; @@ -82,6 +83,7 @@ public StoreClient( sessionRetryOptions); addressResolver.setOpenConnectionsProcessor(this.transportClient.getProactiveOpenConnectionsProcessor()); + this.addressResolver = addressResolver; } public void enableThroughputControl(ThroughputControlStore throughputControlStore) { @@ -189,6 +191,10 @@ private RxDocumentServiceResponse completeResponse( RxDocumentServiceResponse rxDocumentServiceResponse = new RxDocumentServiceResponse(this.diagnosticsClientContext, storeResponse); rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); + + IGlobalPartitionEndpointManager globalPartitionEndpointManager = addressResolver.getGlobalPartitionEndpointManager(); + globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); + return rxDocumentServiceResponse; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index f6e226498f7e..a7bbbfc70855 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -132,14 +132,6 @@ public List getAvailableWriteEndpoints() { return this.locationInfo.availableWriteEndpointByLocation.values().stream().collect(Collectors.toList()); } - public List getAvailableWriteEndpointsForPartitionKeyRange(RxDocumentServiceRequest request) { - return new ArrayList<>(); - } - - public List getAvailableReadEndpointsForPartitionKeyRange(RxDocumentServiceRequest request) { - return new ArrayList<>(); - } - /** * Marks the current location unavailable for read */ @@ -154,10 +146,6 @@ public void markEndpointUnavailableForWrite(URI endpoint) { this.markEndpointUnavailable(endpoint, OperationType.Write); } - public void markEndpointUnavailableForWriteAtPartitionKeyRangeScope(RxDocumentServiceRequest request) {} - - public void markEndpointUnavailableForReadAtPartitionKeyRangeScope(RxDocumentServiceRequest request) {} - /** * Invoked when {@link DatabaseAccount} is read * @param databaseAccount READ DatabaseAccount From e4b0cd8a0796ffec03fa4136e1f32aa447af75bc Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 19 Mar 2024 15:54:17 -0400 Subject: [PATCH 007/140] Adding skeletal methods to GlobalPartitionEndpointManagerForCircuitBreaker. --- .../RxGatewayStoreModelTest.java | 19 +- ...itionEndpointManagerForCircuitBreaker.java | 269 ++++++++++++------ .../IGlobalPartitionEndpointManager.java | 3 +- .../implementation/RxDocumentClientImpl.java | 9 +- .../implementation/RxGatewayStoreModel.java | 19 +- .../directconnectivity/AddressResolver.java | 5 +- .../directconnectivity/StoreClient.java | 2 +- 7 files changed, 223 insertions(+), 103 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index 4b99500c971a..89fccfa94374 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -79,6 +79,8 @@ public void readTimeout() throws Exception { QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; UserAgentContainer userAgentContainer = new UserAgentContainer(); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); HttpClient httpClient = Mockito.mock(HttpClient.class); @@ -95,7 +97,8 @@ public void readTimeout() throws Exception { userAgentContainer, globalEndpointManager, httpClient, - null); + null, + globalPartitionEndpointManager); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName(clientContext, @@ -121,6 +124,7 @@ public void serviceUnavailable() throws Exception { QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; UserAgentContainer userAgentContainer = new UserAgentContainer(); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); HttpClient httpClient = Mockito.mock(HttpClient.class); @@ -137,7 +141,8 @@ public void serviceUnavailable() throws Exception { userAgentContainer, globalEndpointManager, httpClient, - null); + null, + globalPartitionEndpointManager); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName(clientContext, @@ -173,6 +178,8 @@ public void applySessionToken( Mockito.doReturn(sdkGlobalSessionToken).when(sessionContainer).resolveGlobalSessionToken(any()); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); @@ -192,7 +199,8 @@ public void applySessionToken( new UserAgentContainer(), globalEndpointManager, httpClient, - apiType); + apiType, + globalPartitionEndpointManager); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); httpClient = ReflectionUtils.getHttpClient(storeModel); @@ -242,6 +250,8 @@ public void validateApiType() throws Exception { Mockito.doReturn(sdkGlobalSessionToken).when(sessionContainer).resolveGlobalSessionToken(any()); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); + IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); @@ -257,7 +267,8 @@ public void validateApiType() throws Exception { new UserAgentContainer(), globalEndpointManager, httpClient, - apiType); + apiType, + globalPartitionEndpointManager); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName( clientContext, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index a0e138d7253b..91ac201e0ba2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -6,12 +6,13 @@ import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import reactor.core.publisher.Mono; import java.net.URI; +import java.time.Duration; import java.time.Instant; -import java.util.HashSet; import java.util.List; -import java.util.Set; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -69,7 +70,10 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); - isFailoverPossible.set(partitionKeyRangeFailoverInfoAsVal.tryMoveNextLocation(applicableEndpoints, failedLocation, request.isReadOnlyRequest())); + isFailoverPossible.set(partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange( + this, + applicableEndpoints, + request.isReadOnlyRequest())); } return partitionKeyRangeFailoverInfoAsVal; @@ -77,11 +81,13 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re // set to true if and only if failure threshold exceeded for the region // and if failover is possible - // a failover is only possible when there are available regions left to failover to + // a failover is only possible when there are available regions left to fail over to if (isFailoverPossible.get()) { + this.updateStaleLocationInfo(request); return true; } + // no regions to fail over to this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); return false; } @@ -126,54 +132,91 @@ public boolean tryBookmarkPartitionKeyRangeSuccess(RxDocumentServiceRequest requ } @Override - public boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request) { - return false; - } + public boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest request) { - @Override - public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request) { + if (request == null) { + throw new IllegalArgumentException("request cannot be null!"); + } + + if (request.requestContext == null) { + + if (logger.isDebugEnabled()) { + logger.warn("requestContext is null!"); + } + + return false; + } + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + + if (partitionKeyRange == null) { + throw new IllegalStateException("requestContext.resolvedPartitionKeyRange cannot be null!"); + } + + URI locationWithUndeterminedAvailability = request.requestContext.locationEndpointToRoute; + + if (locationWithUndeterminedAvailability == null) { + throw new IllegalStateException("requestContext.locationEndpointToRoute cannot be null!"); + } + + if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { + + // is it possible for this instance to go stale? + PartitionLevelFailoverInfo partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + + if (partitionLevelFailoverInfo.partitionLevelFailureMetadata.containsKey(locationWithUndeterminedAvailability)) { + LocationLevelFailureMetadata locationLevelFailureMetadata + = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(locationWithUndeterminedAvailability); + + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.FreshUnavailable) { + return false; + } + } + + // there is no locationLevelFailureMetadata for locationWithUndeterminedAvailability + // [or] locationWithUndeterminedAvailability is still available / is stale unavailable + return true; + } + + // there is no partitionLevelFailoverInfo for partitionKeyRange return true; + } + + public void updateStaleLocationInfo(RxDocumentServiceRequest request) { + Mono.delay(Duration.ofSeconds(60)) + .flatMap(ignore -> { + Map partitionKeyRangeToFailoverInfo + = this.partitionKeyRangeToFailoverInfo; -// if (request == null) { -// throw new IllegalArgumentException("request cannot be null!"); -// } -// -// if (request.requestContext == null) { -// -// if (logger.isDebugEnabled()) { -// logger.warn("requestContext is null!"); -// } -// -// return false; -// } -// -// PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; -// -// if (partitionKeyRange == null) { -// return false; -// } -// -// if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { -// -// // is it possible for this instance to go stale? -// PartitionLevelFailoverInfo partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); -// -// // it could be possible that this currentLocationSnapshot is stale since the ConcurrentHashMap.get -// // thread won over ConcurrentHashMap.compute (can mark a location as failed), in that case, the request -// // could hit possible unavailability issues again -// URI currentLocationSnapshot = partitionLevelFailoverInfo.current; -// -// if (logger.isDebugEnabled()) { -// logger.debug("Moving request to location : {}", currentLocationSnapshot.getPath()); -// } -// -// request.requestContext.routeToLocation(currentLocationSnapshot); -// -// return true; -// } -// -// return false; + if (request.requestContext == null) { + return Mono.empty(); + } + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + + if (partitionKeyRange == null) { + return Mono.empty(); + } + + URI unavailableLocation = request.requestContext.locationEndpointToRoute; + + if (unavailableLocation == null) { + return Mono.empty(); + } + + if (partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { + PartitionLevelFailoverInfo partitionLevelFailoverInfo + = partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + LocationLevelFailureMetadata locationLevelFailureMetadata + = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(unavailableLocation); + + locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.StaleUnavailable); + } + + return Mono.empty(); + }).subscribeOn(CosmosSchedulers.COSMOS_PARALLEL) + .subscribe(); } // what is the point of an inner class? @@ -184,10 +227,8 @@ public boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest req // 4. failure type static class PartitionLevelFailoverInfo { - private final ConcurrentHashMap partitionLevelFailureMetadata; - private final Set failedLocations = ConcurrentHashMap.newKeySet(); + private final ConcurrentHashMap partitionLevelFailureMetadata; // points to the current location a request will be routed to - private URI current; PartitionLevelFailoverInfo() { this.partitionLevelFailureMetadata = new ConcurrentHashMap<>(); @@ -201,29 +242,29 @@ public boolean isFailureThresholdBreachedForLocation(URI failedLocation, boolean AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - this.partitionLevelFailureMetadata.compute(failedLocation, (locationAsKey, partitionLevelFailureMetadataAsVal) -> { + this.partitionLevelFailureMetadata.compute(failedLocation, (locationAsKey, locationLevelFailureMetadataAsVal) -> { - if (partitionLevelFailureMetadataAsVal == null) { - partitionLevelFailureMetadataAsVal = new PartitionLevelFailureMetadata(); + if (locationLevelFailureMetadataAsVal == null) { + locationLevelFailureMetadataAsVal = new LocationLevelFailureMetadata(); } // todo : make threshold for marking a location as failed more comprehensive if (isReadRequest) { - if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.incrementAndGet() > 5) { - partitionLevelFailureMetadataAsVal.unavailableForReadsSince.set(Instant.now()); - this.current = failedLocation; + if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForReads.incrementAndGet() > 5) { + locationLevelFailureMetadataAsVal.unavailableForReadsSince.set(Instant.now()); + locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.FreshUnavailable); isFailureThresholdBreached.set(true); } } else { - if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.incrementAndGet() > 5) { - partitionLevelFailureMetadataAsVal.unavailableForWritesSince.set(Instant.now()); - this.current = failedLocation; + if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.incrementAndGet() > 5) { + locationLevelFailureMetadataAsVal.unavailableForWritesSince.set(Instant.now()); + locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.FreshUnavailable); isFailureThresholdBreached.set(true); } } - return partitionLevelFailureMetadataAsVal; + return locationLevelFailureMetadataAsVal; }); return isFailureThresholdBreached.get(); @@ -231,70 +272,120 @@ public boolean isFailureThresholdBreachedForLocation(URI failedLocation, boolean // bookmark success public void bookmarkSuccess(URI succeededLocation, boolean isReadRequest) { - this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, partitionLevelFailureMetadataAsVal) -> { + this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, locationLevelFailureMetadataAsVal) -> { - if (partitionLevelFailureMetadataAsVal == null) { - return new PartitionLevelFailureMetadata(); + if (locationLevelFailureMetadataAsVal == null) { + return new LocationLevelFailureMetadata(); } if (isReadRequest) { - if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.get() > 1) { - partitionLevelFailureMetadataAsVal.consecutiveFailureCountForReads.decrementAndGet(); + if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForReads.get() > 1) { + switch (locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.get()) { + case StaleUnavailable: + locationLevelFailureMetadataAsVal = new LocationLevelFailureMetadata(); + case Available: + locationLevelFailureMetadataAsVal.consecutiveFailureCountForReads.decrementAndGet(); + } } } else { - if (partitionLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.get() > 1) { - partitionLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.decrementAndGet(); - } + if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.get() > 1) { + switch (locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.get()) { + case StaleUnavailable: + locationLevelFailureMetadataAsVal = new LocationLevelFailureMetadata(); + case Available: + locationLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.decrementAndGet(); + } } } - return partitionLevelFailureMetadataAsVal; + return locationLevelFailureMetadataAsVal; }); } // method purpose - choose the next possible region for this partition // 1. if current == failedLocation - try using a different location // a) iterate through the list of read / write locations - // b) if location in iteration loop not part of failedLocations, then assign location to current + // b) if location in iteration loop not part of locationUnavailabilityInfos, then assign location to current // 2. if current != failedLocation - a different thread has updated it // 3. - public boolean tryMoveNextLocation(List locations, URI failedLocation, boolean isReadRequest) { - for (URI location : locations) { - if (failedLocation == this.current) { - continue; + public boolean areLocationsAvailableForPartitionKeyRange( + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker, + List availableLocationsAtAccountLevel, + boolean isReadRequest) { + + for (URI availableLocation : availableLocationsAtAccountLevel) { + if (!this.partitionLevelFailureMetadata.containsKey(availableLocation)) { + return true; + } else { + LocationLevelFailureMetadata locationLevelFailureMetadata = this.partitionLevelFailureMetadata.get(availableLocation); + + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.Available) { + return true; + } } + } - // failedLocation != current - if (!this.failedLocations.contains(failedLocation)) { + Instant mostStaleUnavailableTimeAcrossRegions = Instant.MAX; + LocationLevelFailureMetadata locationLevelFailureMetadataForMostStaleLocation = null; - this.failedLocations.add(new FailedLocation(failedLocation, isReadRequest)); - this.current = location; + // find region with most 'stale' unavailability + for (Map.Entry uriToLocationLevelFailureMetadata : this.partitionLevelFailureMetadata.entrySet()) { + URI unavailableLocation = uriToLocationLevelFailureMetadata.getKey(); + LocationLevelFailureMetadata locationLevelFailureMetadata = uriToLocationLevelFailureMetadata.getValue(); + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.Available) { return true; } + + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.StaleUnavailable) { + return true; + } + + if (isReadRequest) { + + Instant unavailableSince = locationLevelFailureMetadata.unavailableForReadsSince.get(); + + if (mostStaleUnavailableTimeAcrossRegions.isAfter(unavailableSince)) { + mostStaleUnavailableTimeAcrossRegions = unavailableSince; + locationLevelFailureMetadataForMostStaleLocation = locationLevelFailureMetadata; + } + } else { + + Instant unavailableSince = locationLevelFailureMetadata.unavailableForWritesSince.get(); + + if (mostStaleUnavailableTimeAcrossRegions.isAfter(locationLevelFailureMetadata.unavailableForWritesSince.get())) { + mostStaleUnavailableTimeAcrossRegions = unavailableSince; + locationLevelFailureMetadataForMostStaleLocation = locationLevelFailureMetadata; + } + } } - return false; - } + if (locationLevelFailureMetadataForMostStaleLocation != null) { + locationLevelFailureMetadataForMostStaleLocation.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.StaleUnavailable); + return true; + } - public boolean tryMarkLocationAsAvailable(URI previouslyFailedLocation) { return false; } } - static class PartitionLevelFailureMetadata { + private static class LocationLevelFailureMetadata { private final AtomicInteger consecutiveFailureCountForWrites = new AtomicInteger(); private final AtomicInteger consecutiveFailureCountForReads = new AtomicInteger(); private final AtomicReference unavailableForWritesSince = new AtomicReference<>(Instant.MIN); private final AtomicReference unavailableForReadsSince = new AtomicReference<>(Instant.MIN); + private final AtomicReference partitionKeyRangeUnavailabilityStatus = new AtomicReference<>(PartitionKeyRangeUnavailabilityStatus.Available); + } - static class FailedLocation { - private final URI location; - private final boolean isRead; + enum PartitionKeyRangeUnavailabilityStatus { + Available(100), + FreshUnavailable(200), + StaleUnavailable(300); + + private int priority; - FailedLocation(URI location, boolean isRead) { - this.location = location; - this.isRead = isRead; + PartitionKeyRangeUnavailabilityStatus(int priority) { + this.priority = priority; } } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java index 4621077747aa..f64d17217bee 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java @@ -6,6 +6,5 @@ public interface IGlobalPartitionEndpointManager { boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request); boolean tryBookmarkPartitionKeyRangeSuccess(RxDocumentServiceRequest request); - boolean tryMarkPartitionKeyRangeAsAvailable(RxDocumentServiceRequest request); - boolean tryAddPartitionKeyRangeLevelOverride(RxDocumentServiceRequest request); + boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index a892e4be3120..4748b14643c4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -596,7 +596,8 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.userAgentContainer, this.globalEndpointManager, this.reactorHttpClient, - this.apiType); + this.apiType, + this.globalPartitionEndpointManager); this.globalEndpointManager.init(); this.initializeGatewayConfigurationReader(); @@ -725,7 +726,8 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - ApiType apiType) { + ApiType apiType, + IGlobalPartitionEndpointManager globalPartitionEndpointManager) { return new RxGatewayStoreModel( this, sessionContainer, @@ -734,7 +736,8 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, userAgentContainer, globalEndpointManager, httpClient, - apiType); + apiType, + globalPartitionEndpointManager); } private HttpClient httpClient() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index e17726deaf32..c796cd3c8db7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -61,6 +61,7 @@ public class RxGatewayStoreModel implements RxStoreModel { private final HttpClient httpClient; private final QueryCompatibilityMode queryCompatibilityMode; private final GlobalEndpointManager globalEndpointManager; + private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; private ConsistencyLevel defaultConsistencyLevel; private ISessionContainer sessionContainer; private ThroughputControlStore throughputControlStore; @@ -78,7 +79,8 @@ public RxGatewayStoreModel( UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - ApiType apiType) { + ApiType apiType, + IGlobalPartitionEndpointManager globalPartitionEndpointManager) { this.clientContext = clientContext; this.defaultHeaders = new HashMap<>(); this.defaultHeaders.put(HttpConstants.HttpHeaders.CACHE_CONTROL, @@ -110,6 +112,7 @@ public RxGatewayStoreModel( this.httpClient = httpClient; this.sessionContainer = sessionContainer; + this.globalPartitionEndpointManager = globalPartitionEndpointManager; } public RxGatewayStoreModel(RxGatewayStoreModel inner) { @@ -121,6 +124,7 @@ public RxGatewayStoreModel(RxGatewayStoreModel inner) { this.httpClient = inner.httpClient; this.sessionContainer = inner.sessionContainer; + this.globalPartitionEndpointManager = inner.globalPartitionEndpointManager; } void setGatewayServiceConfigurationReader(GatewayServiceConfigurationReader gatewayServiceConfigurationReader) { @@ -539,7 +543,12 @@ private Mono invokeAsyncInternal(RxDocumentServiceReq } private Mono invokeAsync(RxDocumentServiceRequest request) { - Callable> funcDelegate = () -> invokeAsyncInternal(request).single(); + + if (this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { + return Mono.error(new ServiceUnavailableException("PkRange is unavailable at region", null, request.requestContext.locationEndpointToRoute, HttpConstants.SubStatusCodes.UNKNOWN)); + } + + Callable> funcDelegate = () -> invokeAsyncInternal(request).single().doOnSuccess(ignore -> this.globalPartitionEndpointManager.tryBookmarkPartitionKeyRangeSuccess(request)); MetadataRequestRetryPolicy metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(this.globalEndpointManager); metadataRequestRetryPolicy.onBeforeSendRequest(request); @@ -724,6 +733,9 @@ private Mono applySessionToken(RxDocumentServiceRequest request) { PartitionKeyRange range = collectionRoutingMapValueHolder.v.getRangeByPartitionKeyRangeId(partitionKeyRangeId); request.requestContext.resolvedPartitionKeyRange = range; + + this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request); + if (request.requestContext.resolvedPartitionKeyRange == null) { SessionTokenHelper.setPartitionLocalSessionToken(request, partitionKeyRangeId, sessionContainer); @@ -738,6 +750,9 @@ private Mono applySessionToken(RxDocumentServiceRequest request) { PartitionKeyRange range = collectionRoutingMapValueHolder.v.getRangeByEffectivePartitionKey(effectivePartitionKeyString); request.requestContext.resolvedPartitionKeyRange = range; + + this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request); + SessionTokenHelper.setPartitionLocalSessionToken(request, sessionContainer); } else { //Apply the ambient session. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index fa6e67056ed8..c73b4b5549d2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -22,6 +22,7 @@ import com.azure.cosmos.implementation.ResourceId; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.ServiceUnavailableException; import com.azure.cosmos.implementation.Strings; import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.NotImplementedException; @@ -87,8 +88,8 @@ public Mono resolveAsync( request.requestContext.resolvedPartitionKeyRange = result.TargetPartitionKeyRange; // TODO: use GlobalPartitionEndpointManager to add a partition-level request override - if (!this.globalPartitionEndpointManager.tryAddPartitionKeyRangeLevelOverride(request)) { - return this.resolveAsync(request, forceRefreshPartitionAddresses); + if (this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { + return Mono.error(new ServiceUnavailableException("PkRange is unavailable at region", null, request.requestContext.locationEndpointToRoute, HttpConstants.SubStatusCodes.UNKNOWN)); } return Mono.just(result.Addresses); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index 3272d08a5fed..71955ccdf673 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -193,7 +193,7 @@ private RxDocumentServiceResponse completeResponse( rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); IGlobalPartitionEndpointManager globalPartitionEndpointManager = addressResolver.getGlobalPartitionEndpointManager(); - globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(request); + globalPartitionEndpointManager.tryBookmarkPartitionKeyRangeSuccess(request); return rxDocumentServiceResponse; } From 1ae138237e3a62ea50f46206dcf08570563a2f42 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 19 Mar 2024 20:24:24 -0400 Subject: [PATCH 008/140] Adding skeletal methods to GlobalPartitionEndpointManagerForCircuitBreaker. --- .../implementation/ClientRetryPolicy.java | 2 +- ...itionEndpointManagerForCircuitBreaker.java | 29 +++++++++---------- .../IGlobalPartitionEndpointManager.java | 4 +-- .../implementation/RxGatewayStoreModel.java | 8 +---- .../directconnectivity/StoreClient.java | 2 +- 5 files changed, 18 insertions(+), 27 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index c1baea4de3a3..8261c16dd100 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -322,7 +322,7 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( // if partition-level circuit breaker is enabled if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - this.globalPartitionEndpointManager.tryMarkPartitionKeyRangeAsUnavailable(this.request); + this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(this.request); } // The request has failed with 503, SDK need to decide whether it is safe to retry for write operations diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 91ac201e0ba2..282e17d86ed1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -31,7 +31,7 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo } @Override - public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request) { + public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request) { if (request == null) { throw new IllegalArgumentException("request cannot be null!"); @@ -71,7 +71,6 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); isFailoverPossible.set(partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange( - this, applicableEndpoints, request.isReadOnlyRequest())); } @@ -93,7 +92,7 @@ public boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest re } @Override - public boolean tryBookmarkPartitionKeyRangeSuccess(RxDocumentServiceRequest request) { + public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { if (request == null) { throw new IllegalArgumentException("request cannot be null!"); @@ -169,7 +168,7 @@ public boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest re LocationLevelFailureMetadata locationLevelFailureMetadata = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(locationWithUndeterminedAvailability); - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.FreshUnavailable) { + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable) { return false; } } @@ -211,7 +210,7 @@ public void updateStaleLocationInfo(RxDocumentServiceRequest request) { LocationLevelFailureMetadata locationLevelFailureMetadata = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(unavailableLocation); - locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.StaleUnavailable); + locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); } return Mono.empty(); @@ -253,13 +252,13 @@ public boolean isFailureThresholdBreachedForLocation(URI failedLocation, boolean if (isReadRequest) { if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForReads.incrementAndGet() > 5) { locationLevelFailureMetadataAsVal.unavailableForReadsSince.set(Instant.now()); - locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.FreshUnavailable); + locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); isFailureThresholdBreached.set(true); } } else { if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.incrementAndGet() > 5) { locationLevelFailureMetadataAsVal.unavailableForWritesSince.set(Instant.now()); - locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.FreshUnavailable); + locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); isFailureThresholdBreached.set(true); } } @@ -308,7 +307,6 @@ public void bookmarkSuccess(URI succeededLocation, boolean isReadRequest) { // 2. if current != failedLocation - a different thread has updated it // 3. public boolean areLocationsAvailableForPartitionKeyRange( - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker, List availableLocationsAtAccountLevel, boolean isReadRequest) { @@ -318,7 +316,7 @@ public boolean areLocationsAvailableForPartitionKeyRange( } else { LocationLevelFailureMetadata locationLevelFailureMetadata = this.partitionLevelFailureMetadata.get(availableLocation); - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.Available) { + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.Available) { return true; } } @@ -329,14 +327,13 @@ public boolean areLocationsAvailableForPartitionKeyRange( // find region with most 'stale' unavailability for (Map.Entry uriToLocationLevelFailureMetadata : this.partitionLevelFailureMetadata.entrySet()) { - URI unavailableLocation = uriToLocationLevelFailureMetadata.getKey(); LocationLevelFailureMetadata locationLevelFailureMetadata = uriToLocationLevelFailureMetadata.getValue(); - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.Available) { + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.Available) { return true; } - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionKeyRangeUnavailabilityStatus.StaleUnavailable) { + if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable) { return true; } @@ -360,7 +357,7 @@ public boolean areLocationsAvailableForPartitionKeyRange( } if (locationLevelFailureMetadataForMostStaleLocation != null) { - locationLevelFailureMetadataForMostStaleLocation.partitionKeyRangeUnavailabilityStatus.set(PartitionKeyRangeUnavailabilityStatus.StaleUnavailable); + locationLevelFailureMetadataForMostStaleLocation.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); return true; } @@ -373,18 +370,18 @@ private static class LocationLevelFailureMetadata { private final AtomicInteger consecutiveFailureCountForReads = new AtomicInteger(); private final AtomicReference unavailableForWritesSince = new AtomicReference<>(Instant.MIN); private final AtomicReference unavailableForReadsSince = new AtomicReference<>(Instant.MIN); - private final AtomicReference partitionKeyRangeUnavailabilityStatus = new AtomicReference<>(PartitionKeyRangeUnavailabilityStatus.Available); + private final AtomicReference partitionKeyRangeUnavailabilityStatus = new AtomicReference<>(PartitionScopedRegionUnavailabilityStatus.Available); } - enum PartitionKeyRangeUnavailabilityStatus { + enum PartitionScopedRegionUnavailabilityStatus { Available(100), FreshUnavailable(200), StaleUnavailable(300); private int priority; - PartitionKeyRangeUnavailabilityStatus(int priority) { + PartitionScopedRegionUnavailabilityStatus(int priority) { this.priority = priority; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java index f64d17217bee..f80c6e3db967 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java @@ -4,7 +4,7 @@ package com.azure.cosmos.implementation; public interface IGlobalPartitionEndpointManager { - boolean tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest request); - boolean tryBookmarkPartitionKeyRangeSuccess(RxDocumentServiceRequest request); + boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request); + boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request); boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index c796cd3c8db7..6699b3275dd6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -548,7 +548,7 @@ private Mono invokeAsync(RxDocumentServiceRequest req return Mono.error(new ServiceUnavailableException("PkRange is unavailable at region", null, request.requestContext.locationEndpointToRoute, HttpConstants.SubStatusCodes.UNKNOWN)); } - Callable> funcDelegate = () -> invokeAsyncInternal(request).single().doOnSuccess(ignore -> this.globalPartitionEndpointManager.tryBookmarkPartitionKeyRangeSuccess(request)); + Callable> funcDelegate = () -> invokeAsyncInternal(request).single().doOnSuccess(ignore -> this.globalPartitionEndpointManager.tryBookmarkRegionSuccessForPartitionKeyRange(request)); MetadataRequestRetryPolicy metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(this.globalEndpointManager); metadataRequestRetryPolicy.onBeforeSendRequest(request); @@ -733,9 +733,6 @@ private Mono applySessionToken(RxDocumentServiceRequest request) { PartitionKeyRange range = collectionRoutingMapValueHolder.v.getRangeByPartitionKeyRangeId(partitionKeyRangeId); request.requestContext.resolvedPartitionKeyRange = range; - - this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request); - if (request.requestContext.resolvedPartitionKeyRange == null) { SessionTokenHelper.setPartitionLocalSessionToken(request, partitionKeyRangeId, sessionContainer); @@ -750,9 +747,6 @@ private Mono applySessionToken(RxDocumentServiceRequest request) { PartitionKeyRange range = collectionRoutingMapValueHolder.v.getRangeByEffectivePartitionKey(effectivePartitionKeyString); request.requestContext.resolvedPartitionKeyRange = range; - - this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request); - SessionTokenHelper.setPartitionLocalSessionToken(request, sessionContainer); } else { //Apply the ambient session. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index 71955ccdf673..7b3ab26723d7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -193,7 +193,7 @@ private RxDocumentServiceResponse completeResponse( rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); IGlobalPartitionEndpointManager globalPartitionEndpointManager = addressResolver.getGlobalPartitionEndpointManager(); - globalPartitionEndpointManager.tryBookmarkPartitionKeyRangeSuccess(request); + globalPartitionEndpointManager.tryBookmarkRegionSuccessForPartitionKeyRange(request); return rxDocumentServiceResponse; } From 7a9f32be64dcfc579f5c39fe947648e469e225ad Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 21 Mar 2024 17:10:42 -0400 Subject: [PATCH 009/140] Updated CHANGELOG. --- .../implementation/RxDocumentClientImpl.java | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 4748b14643c4..e1062b03ee26 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2130,8 +2130,9 @@ private Mono> createDocumentCore( } DocumentClientRetryPolicy finalRetryPolicyInstance = requestRetryPolicy; + AtomicReference documentServiceRequestReference = new AtomicReference<>(); - return getPointOperationResponseMonoWithE2ETimeout( + return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs(() -> @@ -2141,10 +2142,11 @@ private Mono> createDocumentCore( nonNullRequestOptions, disableAutomaticIdGeneration, finalRetryPolicyInstance, - scopedDiagnosticsFactory), + scopedDiagnosticsFactory, + documentServiceRequestReference), requestRetryPolicy), scopedDiagnosticsFactory - ); + ), documentServiceRequestReference); } private Mono> createDocumentInternal( @@ -2153,7 +2155,8 @@ private Mono> createDocumentInternal( RequestOptions options, boolean disableAutomaticIdGeneration, DocumentClientRetryPolicy requestRetryPolicy, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference documentServiceRequestReference) { try { logger.debug("Creating a Document. collectionLink: [{}]", collectionLink); @@ -2161,7 +2164,10 @@ private Mono> createDocumentInternal( options, disableAutomaticIdGeneration, OperationType.Create, clientContextOverride); return requestObs - .flatMap(request -> create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options))) + .flatMap(request -> { + documentServiceRequestReference.set(request); + return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); + }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); } catch (Exception e) { @@ -2199,6 +2205,14 @@ private static Mono getPointOperationResponseMonoWithE2ETimeout( return rxDocumentServiceResponseMono; } + private Mono handleRegionFeedbackForPointOperation(Mono response, AtomicReference requestReference) { + return response.doOnError(throwable -> { + if (throwable instanceof OperationCancelledException) { + this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + } + }); + } + private static Throwable getCancellationExceptionForPointOperations( ScopedDiagnosticsFactory scopedDiagnosticsFactory, Throwable throwable, From 610af3464dc7664a2ff4d2868abe93f373d49967 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 22 Mar 2024 09:33:25 -0400 Subject: [PATCH 010/140] Added class to error track on a per-region basis. --- ...gionLevelCircuitBreakerRequestContext.java | 57 +++++++++++++++++++ .../RxDocumentServiceRequest.java | 3 + 2 files changed, 60 insertions(+) create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RegionLevelCircuitBreakerRequestContext.java diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RegionLevelCircuitBreakerRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RegionLevelCircuitBreakerRequestContext.java new file mode 100644 index 000000000000..5aa737a87f15 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RegionLevelCircuitBreakerRequestContext.java @@ -0,0 +1,57 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import java.net.URI; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class RegionLevelCircuitBreakerRequestContext { + + private final Map failuresPerRegion; + private final boolean isRegionLevelCircuitBreakerEnabled; + + public RegionLevelCircuitBreakerRequestContext(boolean isRegionLevelCircuitBreakerEnabled) { + this.failuresPerRegion = new ConcurrentHashMap<>(); + this.isRegionLevelCircuitBreakerEnabled = isRegionLevelCircuitBreakerEnabled; + } + + public boolean tryRecordRegionScopedFailure(URI locationEndpointToRoute, int statusCode, int subStatusCode) { + if (isRegionScopedFailure(statusCode, subStatusCode)) { + // add to map + return true; + } + return false; + } + + private static boolean isRegionScopedFailure(int statusCode, int subStatusCode) { + + if (statusCode == HttpConstants.StatusCodes.GONE + && subStatusCode == HttpConstants.SubStatusCodes.COMPLETING_PARTITION_MIGRATION) { + return true; + } + + if (statusCode == HttpConstants.StatusCodes.GONE + && subStatusCode == HttpConstants.SubStatusCodes.COMPLETING_PARTITION_MIGRATION_EXCEEDED_RETRY_LIMIT) { + return true; + } + + if (statusCode == HttpConstants.StatusCodes.GONE + && subStatusCode == HttpConstants.SubStatusCodes.SERVER_GENERATED_410) { + return true; + } + + if (statusCode == HttpConstants.StatusCodes.SERVICE_UNAVAILABLE + && subStatusCode == HttpConstants.SubStatusCodes.SERVER_GENERATED_503) { + return true; + } + + if (statusCode == HttpConstants.StatusCodes.SERVICE_UNAVAILABLE + && subStatusCode == HttpConstants.SubStatusCodes.SERVER_GENERATED_503) { + return true; + } + + return false; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java index df0bf4b865a0..2bc3f917fa74 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java @@ -57,6 +57,7 @@ public class RxDocumentServiceRequest implements Cloneable { public DocumentServiceRequestContext requestContext; public FaultInjectionRequestContext faultInjectionRequestContext; + public RegionLevelCircuitBreakerRequestContext regionLevelCircuitBreakerRequestContext; // has the non serialized value of the partition-key private PartitionKeyInternal partitionKeyInternal; @@ -174,6 +175,7 @@ private RxDocumentServiceRequest(DiagnosticsClientContext clientContext, this.authorizationTokenType = authorizationTokenType; this.requestContext = new DocumentServiceRequestContext(); this.faultInjectionRequestContext = new FaultInjectionRequestContext(); + this.regionLevelCircuitBreakerRequestContext = new RegionLevelCircuitBreakerRequestContext(false); if (StringUtils.isNotEmpty(this.headers.get(WFConstants.BackendHeaders.PARTITION_KEY_RANGE_ID))) this.partitionKeyRangeIdentity = PartitionKeyRangeIdentity.fromHeader(this.headers.get(WFConstants.BackendHeaders.PARTITION_KEY_RANGE_ID)); } @@ -194,6 +196,7 @@ private RxDocumentServiceRequest(DiagnosticsClientContext clientContext, this.clientContext = clientContext; this.requestContext = new DocumentServiceRequestContext(); this.faultInjectionRequestContext = new FaultInjectionRequestContext(); + this.regionLevelCircuitBreakerRequestContext = new RegionLevelCircuitBreakerRequestContext(false); this.operationType = operationType; this.resourceType = resourceType; this.requestContext.sessionToken = null; From af6b6b3b413a285d69a821b1e5fe5b73aacb88e1 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 22 Mar 2024 19:58:09 -0400 Subject: [PATCH 011/140] Refactor region health transitions. --- ...ProactiveOpenConnectionsProcessorTest.java | 2 + .../azure/cosmos/implementation/ErrorKey.java | 11 + ...itionEndpointManagerForCircuitBreaker.java | 288 +++++++++++------- ...ionLevelCircuitBreakerRequestContext.java} | 35 ++- .../RxDocumentServiceRequest.java | 6 +- .../GoneAndRetryWithRetryPolicy.java | 22 +- 6 files changed, 249 insertions(+), 115 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java rename sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/{RegionLevelCircuitBreakerRequestContext.java => LocationLevelCircuitBreakerRequestContext.java} (58%) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java index e24f5968e9d6..ba74763bb3bc 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java @@ -163,6 +163,8 @@ public void recordNewAddressesAfterSplitTest() { try { + Thread.sleep(10_000); + int totalRequests = 200; List preferredRegions = this.writeRegionMap.keySet().stream().collect(Collectors.toList()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java new file mode 100644 index 000000000000..3bdf9245a2c7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java @@ -0,0 +1,11 @@ +package com.azure.cosmos.implementation; + +public class ErrorKey { + private final int statusCode; + private final int subStatusCode; + + public ErrorKey(int statusCode, int subStatusCode) { + this.statusCode = statusCode; + this.subStatusCode = subStatusCode; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 282e17d86ed1..83066a2332a4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -62,7 +62,7 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfo(); } - isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(failedLocation, request.isReadOnlyRequest())); + isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(request)); if (isFailureThresholdBreached.get()) { @@ -70,9 +70,8 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); - isFailoverPossible.set(partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange( - applicableEndpoints, - request.isReadOnlyRequest())); + isFailoverPossible.set( + partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); } return partitionKeyRangeFailoverInfoAsVal; @@ -122,7 +121,7 @@ public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceReq partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfo(); } - partitionKeyRangeFailoverInfoAsVal.bookmarkSuccess(succeededLocation, request.isReadOnlyRequest()); + partitionKeyRangeFailoverInfoAsVal.bookmarkSuccess(succeededLocation); return partitionKeyRangeFailoverInfoAsVal; }); } @@ -165,10 +164,10 @@ public boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest re if (partitionLevelFailoverInfo.partitionLevelFailureMetadata.containsKey(locationWithUndeterminedAvailability)) { - LocationLevelFailureMetadata locationLevelFailureMetadata + LocationLevelMetrics locationLevelMetrics = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(locationWithUndeterminedAvailability); - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable) { + if (locationLevelMetrics.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable) { return false; } } @@ -207,10 +206,10 @@ public void updateStaleLocationInfo(RxDocumentServiceRequest request) { if (partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { PartitionLevelFailoverInfo partitionLevelFailoverInfo = partitionKeyRangeToFailoverInfo.get(partitionKeyRange); - LocationLevelFailureMetadata locationLevelFailureMetadata + LocationLevelMetrics locationLevelMetrics = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(unavailableLocation); - locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + locationLevelMetrics.partitionScopedRegionUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); } return Mono.empty(); @@ -218,160 +217,239 @@ public void updateStaleLocationInfo(RxDocumentServiceRequest request) { .subscribe(); } - // what is the point of an inner class? - // at a high-level, the below class needs: - // 1. consecutive failure count tracker - // 2. unavailable since - // 3. regions unavailable in - // 4. failure type static class PartitionLevelFailoverInfo { - private final ConcurrentHashMap partitionLevelFailureMetadata; - // points to the current location a request will be routed to + private final ConcurrentHashMap partitionLevelFailureMetadata; PartitionLevelFailoverInfo() { this.partitionLevelFailureMetadata = new ConcurrentHashMap<>(); } - // bookmark failure - // method purpose: - // 1. increment consecutive failure count - // 2. if failure count crosses threshold for - public boolean isFailureThresholdBreachedForLocation(URI failedLocation, boolean isReadRequest) { + public boolean isFailureThresholdBreachedForLocation(RxDocumentServiceRequest request) { AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - this.partitionLevelFailureMetadata.compute(failedLocation, (locationAsKey, locationLevelFailureMetadataAsVal) -> { + if (request.locationLevelCircuitBreakerRequestContext == null) { + return false; + } - if (locationLevelFailureMetadataAsVal == null) { - locationLevelFailureMetadataAsVal = new LocationLevelFailureMetadata(); - } + if (!request.locationLevelCircuitBreakerRequestContext.getFailuresForAllLocations().isEmpty()) { - // todo : make threshold for marking a location as failed more comprehensive + ConcurrentHashMap> failuresForAllLocations + = request.locationLevelCircuitBreakerRequestContext.getFailuresForAllLocations(); - if (isReadRequest) { - if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForReads.incrementAndGet() > 5) { - locationLevelFailureMetadataAsVal.unavailableForReadsSince.set(Instant.now()); - locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); - isFailureThresholdBreached.set(true); - } - } else { - if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.incrementAndGet() > 5) { - locationLevelFailureMetadataAsVal.unavailableForWritesSince.set(Instant.now()); - locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); - isFailureThresholdBreached.set(true); - } - } + for (Map.Entry> failuresPerLocation : failuresForAllLocations.entrySet()) { - return locationLevelFailureMetadataAsVal; - }); + URI location = failuresPerLocation.getKey(); + ConcurrentHashMap errorCounts = failuresPerLocation.getValue(); - return isFailureThresholdBreached.get(); - } + this.partitionLevelFailureMetadata.compute(location, (locationAsKey, locationLevelMetricsAsVal) -> { + + if (locationLevelMetricsAsVal == null) { + locationLevelMetricsAsVal = new LocationLevelMetrics(); + } - // bookmark success - public void bookmarkSuccess(URI succeededLocation, boolean isReadRequest) { - this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, locationLevelFailureMetadataAsVal) -> { + for (Map.Entry countForError : errorCounts.entrySet()) { + locationLevelMetricsAsVal.handleFailure(countForError.getValue()); + } - if (locationLevelFailureMetadataAsVal == null) { - return new LocationLevelFailureMetadata(); + isFailureThresholdBreached.set(locationLevelMetricsAsVal.isFailureThresholdBreached()); + return locationLevelMetricsAsVal; + }); } + } - if (isReadRequest) { - if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForReads.get() > 1) { - switch (locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.get()) { - case StaleUnavailable: - locationLevelFailureMetadataAsVal = new LocationLevelFailureMetadata(); - case Available: - locationLevelFailureMetadataAsVal.consecutiveFailureCountForReads.decrementAndGet(); - } - } - } else { - if (locationLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.get() > 1) { - switch (locationLevelFailureMetadataAsVal.partitionKeyRangeUnavailabilityStatus.get()) { - case StaleUnavailable: - locationLevelFailureMetadataAsVal = new LocationLevelFailureMetadata(); - case Available: - locationLevelFailureMetadataAsVal.consecutiveFailureCountForWrites.decrementAndGet(); - } } + return isFailureThresholdBreached.get(); + } + + public void bookmarkSuccess(URI succeededLocation) { + this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, locationLevelMetricsAsVal) -> { + + if (locationLevelMetricsAsVal == null) { + return new LocationLevelMetrics(); } - return locationLevelFailureMetadataAsVal; + locationLevelMetricsAsVal.handleSuccess(); + return locationLevelMetricsAsVal; }); } - // method purpose - choose the next possible region for this partition - // 1. if current == failedLocation - try using a different location - // a) iterate through the list of read / write locations - // b) if location in iteration loop not part of locationUnavailabilityInfos, then assign location to current - // 2. if current != failedLocation - a different thread has updated it - // 3. - public boolean areLocationsAvailableForPartitionKeyRange( - List availableLocationsAtAccountLevel, - boolean isReadRequest) { + public boolean areLocationsAvailableForPartitionKeyRange(List availableLocationsAtAccountLevel) { for (URI availableLocation : availableLocationsAtAccountLevel) { if (!this.partitionLevelFailureMetadata.containsKey(availableLocation)) { return true; } else { - LocationLevelFailureMetadata locationLevelFailureMetadata = this.partitionLevelFailureMetadata.get(availableLocation); + LocationLevelMetrics locationLevelMetrics = this.partitionLevelFailureMetadata.get(availableLocation); - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.Available) { + if (locationLevelMetrics.isRegionAvailableToProcessRequests()) { return true; } } } Instant mostStaleUnavailableTimeAcrossRegions = Instant.MAX; - LocationLevelFailureMetadata locationLevelFailureMetadataForMostStaleLocation = null; + LocationLevelMetrics locationLevelFailureMetadataForMostStaleLocation = null; // find region with most 'stale' unavailability - for (Map.Entry uriToLocationLevelFailureMetadata : this.partitionLevelFailureMetadata.entrySet()) { - LocationLevelFailureMetadata locationLevelFailureMetadata = uriToLocationLevelFailureMetadata.getValue(); + for (Map.Entry uriToLocationLevelFailureMetadata : this.partitionLevelFailureMetadata.entrySet()) { + LocationLevelMetrics locationLevelMetrics = uriToLocationLevelFailureMetadata.getValue(); - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.Available) { + if (locationLevelMetrics.isRegionAvailableToProcessRequests()) { return true; } - if (locationLevelFailureMetadata.partitionKeyRangeUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable) { - return true; + Instant unavailableSinceSnapshot = locationLevelMetrics.unavailableSince.get(); + + if (mostStaleUnavailableTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { + mostStaleUnavailableTimeAcrossRegions = unavailableSinceSnapshot; + locationLevelFailureMetadataForMostStaleLocation = locationLevelMetrics; } + } + + if (locationLevelFailureMetadataForMostStaleLocation != null) { + locationLevelFailureMetadataForMostStaleLocation.handleSuccess(); + return true; + } + + return false; + } + } - if (isReadRequest) { + private static class LocationLevelMetrics { + private final AtomicInteger failureCount = new AtomicInteger(0); + private final AtomicInteger successCount = new AtomicInteger(0); + private final AtomicReference unavailableSince = new AtomicReference<>(Instant.MAX); + private final AtomicReference partitionScopedRegionUnavailabilityStatus = new AtomicReference<>(PartitionScopedRegionUnavailabilityStatus.Available); + private final AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - Instant unavailableSince = locationLevelFailureMetadata.unavailableForReadsSince.get(); + public void handleSuccess() { - if (mostStaleUnavailableTimeAcrossRegions.isAfter(unavailableSince)) { - mostStaleUnavailableTimeAcrossRegions = unavailableSince; - locationLevelFailureMetadataForMostStaleLocation = locationLevelFailureMetadata; + PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); + + double allowedFailureRatio = getAllowedFailureRatioByStatus(currentStatusSnapshot); + + switch (currentStatusSnapshot) { + case Available: + if (failureCount.get() > 0) { + failureCount.decrementAndGet(); } - } else { + break; + case StaleUnavailable: + if (successCount.get() < 10) { + successCount.incrementAndGet(); + } else { + if ((double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); + } + } + break; + case FreshUnavailable: + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + break; + default: + throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); + } + } + + public void handleFailure(int errorCount) { - Instant unavailableSince = locationLevelFailureMetadata.unavailableForWritesSince.get(); + PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); - if (mostStaleUnavailableTimeAcrossRegions.isAfter(locationLevelFailureMetadata.unavailableForWritesSince.get())) { - mostStaleUnavailableTimeAcrossRegions = unavailableSince; - locationLevelFailureMetadataForMostStaleLocation = locationLevelFailureMetadata; + int allowedFailureCount = getAllowedFailureCountByStatus(currentStatusSnapshot); + + switch (currentStatusSnapshot) { + case Available: + if (failureCount.get() < allowedFailureCount) { + failureCount.addAndGet(errorCount); + } else { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); + } + case StaleUnavailable: + if (failureCount.get() < allowedFailureCount) { + failureCount.addAndGet(errorCount); + } else { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); } + default: + throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); + } + } + + public void setHealthStatus(PartitionScopedRegionUnavailabilityStatus status) { + this.partitionScopedRegionUnavailabilityStatus.updateAndGet(previousStatus -> { + + PartitionScopedRegionUnavailabilityStatus newStatus; + + switch (status) { + case Available: + if (previousStatus == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable) { + this.failureCount.set(0); + this.successCount.set(0); + this.unavailableSince.set(Instant.MAX); + this.isFailureThresholdBreached.set(false); + } + newStatus = status; + break; + case FreshUnavailable: + if (previousStatus == PartitionScopedRegionUnavailabilityStatus.Available) { + this.failureCount.set(0); + this.successCount.set(0); + this.unavailableSince.set(Instant.now()); + this.isFailureThresholdBreached.set(true); + } + newStatus = status; + break; + case StaleUnavailable: + this.failureCount.set(0); + this.successCount.set(0); + this.unavailableSince.set(Instant.MAX); + this.isFailureThresholdBreached.set(false); + newStatus = status; + break; + default: + throw new IllegalStateException("Unsupported health status: " + status); } + + return newStatus; + }); + } + + private static double getAllowedFailureRatioByStatus(PartitionScopedRegionUnavailabilityStatus status) { + switch (status) { + case Available: + return 0.3d; + case StaleUnavailable: + return 0.1d; + default: + throw new IllegalStateException("Unsupported health status: " + status); } + } - if (locationLevelFailureMetadataForMostStaleLocation != null) { - locationLevelFailureMetadataForMostStaleLocation.partitionKeyRangeUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - return true; + private static int getAllowedFailureCountByStatus(PartitionScopedRegionUnavailabilityStatus status) { + switch (status) { + case Available: + return 10; + case StaleUnavailable: + return 5; + default: + throw new IllegalStateException("Unsupported health status: " + status); } + } - return false; + public boolean isFailureThresholdBreached() { + return this.isFailureThresholdBreached.get(); } - } - private static class LocationLevelFailureMetadata { - private final AtomicInteger consecutiveFailureCountForWrites = new AtomicInteger(); - private final AtomicInteger consecutiveFailureCountForReads = new AtomicInteger(); - private final AtomicReference unavailableForWritesSince = new AtomicReference<>(Instant.MIN); - private final AtomicReference unavailableForReadsSince = new AtomicReference<>(Instant.MIN); - private final AtomicReference partitionKeyRangeUnavailabilityStatus = new AtomicReference<>(PartitionScopedRegionUnavailabilityStatus.Available); + public boolean isRegionAvailableToProcessRequests() { + return this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.Available || + this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable; + } + + public boolean isRegionUnavailableToProcessRequest() { + return this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable; + } } enum PartitionScopedRegionUnavailabilityStatus { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RegionLevelCircuitBreakerRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/LocationLevelCircuitBreakerRequestContext.java similarity index 58% rename from sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RegionLevelCircuitBreakerRequestContext.java rename to sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/LocationLevelCircuitBreakerRequestContext.java index 5aa737a87f15..9eca8b2b9621 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RegionLevelCircuitBreakerRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/LocationLevelCircuitBreakerRequestContext.java @@ -4,22 +4,41 @@ package com.azure.cosmos.implementation; import java.net.URI; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -public class RegionLevelCircuitBreakerRequestContext { +public class LocationLevelCircuitBreakerRequestContext { - private final Map failuresPerRegion; + private final ConcurrentHashMap> failuresForAllLocations; private final boolean isRegionLevelCircuitBreakerEnabled; - public RegionLevelCircuitBreakerRequestContext(boolean isRegionLevelCircuitBreakerEnabled) { - this.failuresPerRegion = new ConcurrentHashMap<>(); + public LocationLevelCircuitBreakerRequestContext(boolean isRegionLevelCircuitBreakerEnabled) { + this.failuresForAllLocations = new ConcurrentHashMap<>(); this.isRegionLevelCircuitBreakerEnabled = isRegionLevelCircuitBreakerEnabled; } public boolean tryRecordRegionScopedFailure(URI locationEndpointToRoute, int statusCode, int subStatusCode) { if (isRegionScopedFailure(statusCode, subStatusCode)) { - // add to map + failuresForAllLocations.compute(locationEndpointToRoute, ((uri, errorKeyToCount) -> { + + if (errorKeyToCount == null) { + errorKeyToCount = new ConcurrentHashMap<>(); + errorKeyToCount.put(new ErrorKey(statusCode, subStatusCode), 1); + return errorKeyToCount; + } + + errorKeyToCount.compute(new ErrorKey(statusCode, subStatusCode), (errorKey, count) -> { + + if (count == null) { + count = 1; + return count; + } + + return count + 1; + }); + + return errorKeyToCount; + })); + return true; } return false; @@ -54,4 +73,8 @@ private static boolean isRegionScopedFailure(int statusCode, int subStatusCode) return false; } + + public ConcurrentHashMap> getFailuresForAllLocations() { + return this.failuresForAllLocations; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java index 2bc3f917fa74..69c2a3e52efc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java @@ -57,7 +57,7 @@ public class RxDocumentServiceRequest implements Cloneable { public DocumentServiceRequestContext requestContext; public FaultInjectionRequestContext faultInjectionRequestContext; - public RegionLevelCircuitBreakerRequestContext regionLevelCircuitBreakerRequestContext; + public LocationLevelCircuitBreakerRequestContext locationLevelCircuitBreakerRequestContext; // has the non serialized value of the partition-key private PartitionKeyInternal partitionKeyInternal; @@ -175,7 +175,7 @@ private RxDocumentServiceRequest(DiagnosticsClientContext clientContext, this.authorizationTokenType = authorizationTokenType; this.requestContext = new DocumentServiceRequestContext(); this.faultInjectionRequestContext = new FaultInjectionRequestContext(); - this.regionLevelCircuitBreakerRequestContext = new RegionLevelCircuitBreakerRequestContext(false); + this.locationLevelCircuitBreakerRequestContext = new LocationLevelCircuitBreakerRequestContext(false); if (StringUtils.isNotEmpty(this.headers.get(WFConstants.BackendHeaders.PARTITION_KEY_RANGE_ID))) this.partitionKeyRangeIdentity = PartitionKeyRangeIdentity.fromHeader(this.headers.get(WFConstants.BackendHeaders.PARTITION_KEY_RANGE_ID)); } @@ -196,7 +196,7 @@ private RxDocumentServiceRequest(DiagnosticsClientContext clientContext, this.clientContext = clientContext; this.requestContext = new DocumentServiceRequestContext(); this.faultInjectionRequestContext = new FaultInjectionRequestContext(); - this.regionLevelCircuitBreakerRequestContext = new RegionLevelCircuitBreakerRequestContext(false); + this.locationLevelCircuitBreakerRequestContext = new LocationLevelCircuitBreakerRequestContext(false); this.operationType = operationType; this.resourceType = resourceType; this.requestContext.sessionToken = null; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java index b6ba56956913..dac552283e99 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java @@ -17,6 +17,7 @@ import com.azure.cosmos.implementation.RetryWithException; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.ShouldRetryResult; +import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -26,7 +27,6 @@ import java.time.Instant; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicInteger; -import java.util.function.IntBinaryOperator; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -193,6 +193,9 @@ public Mono shouldRetry(Exception exception) { Duration backoffTime = Duration.ofSeconds(0); Duration timeout; boolean forceRefreshAddressCache; + + bookmarkException(this.request, exception); + if (isNonRetryableException(exception)) { logger.debug("Operation will NOT be retried. Current attempt {}, Exception: ", this.attemptCount, exception); @@ -310,6 +313,23 @@ private Pair, Boolean> handleInvalidPartitionException(I return Pair.of(null, false); } + + private static boolean bookmarkException(RxDocumentServiceRequest request, Exception exception) { + + if (exception instanceof CosmosException) { + CosmosException cosmosException = Utils.as(exception, CosmosException.class); + + if (request.requestContext == null) { + return false; + } + + if (request.locationLevelCircuitBreakerRequestContext != null) { + return request.locationLevelCircuitBreakerRequestContext.tryRecordRegionScopedFailure(request.requestContext.locationEndpointToRoute, cosmosException.getStatusCode(), cosmosException.getSubStatusCode()); + } + } + + return false; + } } class RetryWithRetryPolicy implements IRetryPolicy { From 207c268b677e2d455989a23e54a1133b381c01e3 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 24 Mar 2024 19:29:41 -0400 Subject: [PATCH 012/140] Added class to error track on a per-region basis. --- ...itionEndpointManagerForCircuitBreaker.java | 77 +++++++++---------- 1 file changed, 37 insertions(+), 40 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 83066a2332a4..9d9d3ad7b29c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -6,6 +6,7 @@ import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import java.net.URI; @@ -25,11 +26,16 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker implements IGlobalP private final GlobalEndpointManager globalEndpointManager; private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; + public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; } + public void init() { + + } + @Override public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request) { @@ -81,7 +87,6 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR // and if failover is possible // a failover is only possible when there are available regions left to fail over to if (isFailoverPossible.get()) { - this.updateStaleLocationInfo(request); return true; } @@ -181,40 +186,22 @@ public boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest re return true; } - public void updateStaleLocationInfo(RxDocumentServiceRequest request) { - Mono.delay(Duration.ofSeconds(60)) - .flatMap(ignore -> { - Map partitionKeyRangeToFailoverInfo - = this.partitionKeyRangeToFailoverInfo; + private Flux updateStaleLocationInfo() { + return Mono.just(1).repeat().delayElements(Duration.ofSeconds(60)).flatMap(ignore -> { - if (request.requestContext == null) { - return Mono.empty(); - } + for (Map.Entry pkRangeToFailoverInfo : this.partitionKeyRangeToFailoverInfo.entrySet()) { - PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + PartitionLevelFailoverInfo partitionLevelFailoverInfo = pkRangeToFailoverInfo.getValue(); - if (partitionKeyRange == null) { - return Mono.empty(); - } - - URI unavailableLocation = request.requestContext.locationEndpointToRoute; + for (Map.Entry locationToLocationLevelMetrics : partitionLevelFailoverInfo.partitionLevelFailureMetadata.entrySet()) { - if (unavailableLocation == null) { - return Mono.empty(); - } - - if (partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { - PartitionLevelFailoverInfo partitionLevelFailoverInfo - = partitionKeyRangeToFailoverInfo.get(partitionKeyRange); - LocationLevelMetrics locationLevelMetrics - = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(unavailableLocation); - - locationLevelMetrics.partitionScopedRegionUnavailabilityStatus.set(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + LocationLevelMetrics locationLevelMetrics = locationToLocationLevelMetrics.getValue(); + locationLevelMetrics.handleSuccess(false); } + } - return Mono.empty(); - }).subscribeOn(CosmosSchedulers.COSMOS_PARALLEL) - .subscribe(); + return Mono.empty(); + }); } static class PartitionLevelFailoverInfo { @@ -269,7 +256,7 @@ public void bookmarkSuccess(URI succeededLocation) { return new LocationLevelMetrics(); } - locationLevelMetricsAsVal.handleSuccess(); + locationLevelMetricsAsVal.handleSuccess(false); return locationLevelMetricsAsVal; }); } @@ -308,7 +295,7 @@ public boolean areLocationsAvailableForPartitionKeyRange(List availableLoca } if (locationLevelFailureMetadataForMostStaleLocation != null) { - locationLevelFailureMetadataForMostStaleLocation.handleSuccess(); + locationLevelFailureMetadataForMostStaleLocation.handleSuccess(true); return true; } @@ -323,7 +310,7 @@ private static class LocationLevelMetrics { private final AtomicReference partitionScopedRegionUnavailabilityStatus = new AtomicReference<>(PartitionScopedRegionUnavailabilityStatus.Available); private final AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - public void handleSuccess() { + public void handleSuccess(boolean forceStateChange) { PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); @@ -331,21 +318,31 @@ public void handleSuccess() { switch (currentStatusSnapshot) { case Available: - if (failureCount.get() > 0) { - failureCount.decrementAndGet(); + if (!forceStateChange) { + if (failureCount.get() > 0) { + failureCount.decrementAndGet(); + } } break; case StaleUnavailable: - if (successCount.get() < 10) { - successCount.incrementAndGet(); - } else { - if ((double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); + if (!forceStateChange) { + if (successCount.get() < 10) { + successCount.incrementAndGet(); + } else { + if ((double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); + } } } break; case FreshUnavailable: - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + if (!forceStateChange) { + if (Duration.between(this.unavailableSince.get(), Instant.now()).compareTo(Duration.ofSeconds(120)) == 1) { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + } + } else { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + } break; default: throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); From 9e56e8ca1aa334099bae7ed46df428c8c7a294ff Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 26 Mar 2024 10:41:20 -0400 Subject: [PATCH 013/140] Adapt point operations to bookmark failures. --- .../azure/cosmos/implementation/RxDocumentClientImpl.java | 7 +++++++ .../directconnectivity/rntbd/RntbdRequestRecord.java | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index e1062b03ee26..28a1d907ee8b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2206,9 +2206,16 @@ private static Mono getPointOperationResponseMonoWithE2ETimeout( } private Mono handleRegionFeedbackForPointOperation(Mono response, AtomicReference requestReference) { + + if (requestReference.get()) { + + } + return response.doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + } else if (throwable instanceof ServiceUnavailableException) { + this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java index 46d80e90be81..f22974442c5d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.RequestTimeline; import com.azure.cosmos.implementation.RequestTimeoutException; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.SerializerProvider; @@ -264,6 +265,9 @@ public long getRequestId() { public boolean expire() { final CosmosException error; + + RxDocumentServiceRequest serviceRequest = this.args().serviceRequest(); + if ((this.args.serviceRequest().isReadOnly() || !this.hasSendingRequestStarted()) || this.args.serviceRequest().getNonIdempotentWriteRetriesEnabled()){ // Convert from requestTimeoutException to GoneException for the following two scenarios so they can be safely retried: From ddb19f7c3adeb264145f0f4b60c358751426c000 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 26 Mar 2024 12:54:37 -0400 Subject: [PATCH 014/140] Wiring region feedback handling for point operations. --- .../implementation/RxDocumentClientImpl.java | 267 +++++++++++------- 1 file changed, 158 insertions(+), 109 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 28a1d907ee8b..74a6b71016c3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2101,13 +2101,14 @@ public Mono> createDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Create, - (opt, e2ecfg, clientCtxOverride) -> createDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> createDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, - clientCtxOverride), + clientCtxOverride, + isRequestHedged), options, options != null && options.getNonIdempotentWriteRetriesEnabled() ); @@ -2119,7 +2120,8 @@ private Mono> createDocumentCore( RequestOptions options, boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged) { ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy requestRetryPolicy = @@ -2130,7 +2132,7 @@ private Mono> createDocumentCore( } DocumentClientRetryPolicy finalRetryPolicyInstance = requestRetryPolicy; - AtomicReference documentServiceRequestReference = new AtomicReference<>(); + AtomicReference requestReference = new AtomicReference<>(); return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, @@ -2143,10 +2145,10 @@ private Mono> createDocumentCore( disableAutomaticIdGeneration, finalRetryPolicyInstance, scopedDiagnosticsFactory, - documentServiceRequestReference), + requestReference), requestRetryPolicy), scopedDiagnosticsFactory - ), documentServiceRequestReference); + ), requestReference, isRequestHedged); } private Mono> createDocumentInternal( @@ -2205,11 +2207,10 @@ private static Mono getPointOperationResponseMonoWithE2ETimeout( return rxDocumentServiceResponseMono; } - private Mono handleRegionFeedbackForPointOperation(Mono response, AtomicReference requestReference) { - - if (requestReference.get()) { - - } + private Mono handleRegionFeedbackForPointOperation( + Mono response, + AtomicReference requestReference, + boolean isRequestHedged) { return response.doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { @@ -2275,8 +2276,8 @@ public Mono> upsertDocument(String collectionLink, Ob return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Upsert, - (opt, e2ecfg, clientCtxOverride) -> upsertDocumentCore( - collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride), + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> upsertDocumentCore( + collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, isRequestHedged), options, options != null && options.getNonIdempotentWriteRetriesEnabled() ); @@ -2288,7 +2289,8 @@ private Mono> upsertDocumentCore( RequestOptions options, boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2298,21 +2300,22 @@ private Mono> upsertDocumentCore( } DocumentClientRetryPolicy finalRetryPolicyInstance = requestRetryPolicy; + AtomicReference requestReference = new AtomicReference<>(); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> upsertDocumentInternal( - collectionLink, - document, - nonNullRequestOptions, - disableAutomaticIdGeneration, - finalRetryPolicyInstance, - scopedDiagnosticsFactory), - finalRetryPolicyInstance), - scopedDiagnosticsFactory - ); + return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> upsertDocumentInternal( + collectionLink, + document, + nonNullRequestOptions, + disableAutomaticIdGeneration, + finalRetryPolicyInstance, + scopedDiagnosticsFactory, + requestReference), + finalRetryPolicyInstance), + scopedDiagnosticsFactory), requestReference, isRequestHedged); } private Mono> upsertDocumentInternal( @@ -2321,7 +2324,8 @@ private Mono> upsertDocumentInternal( RequestOptions options, boolean disableAutomaticIdGeneration, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference) { try { logger.debug("Upserting a Document. collectionLink: [{}]", collectionLink); @@ -2337,7 +2341,10 @@ private Mono> upsertDocumentInternal( clientContextOverride); return reqObs - .flatMap(request -> upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options))) + .flatMap(request -> { + requestReference.set(request); + return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); + }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); } catch (Exception e) { @@ -2353,12 +2360,13 @@ public Mono> replaceDocument(String documentLink, Obj return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> replaceDocumentCore( documentLink, document, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + isRequestHedged), options, options != null && options.getNonIdempotentWriteRetriesEnabled() ); @@ -2369,7 +2377,8 @@ private Mono> replaceDocumentCore( Object document, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2381,21 +2390,22 @@ private Mono> replaceDocumentCore( collectionCache, requestRetryPolicy, collectionLink, nonNullRequestOptions); } DocumentClientRetryPolicy finalRequestRetryPolicy = requestRetryPolicy; + AtomicReference requestReference = new AtomicReference<>(); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> replaceDocumentInternal( - documentLink, - document, - nonNullRequestOptions, - finalRequestRetryPolicy, - endToEndPolicyConfig, - scopedDiagnosticsFactory), - requestRetryPolicy), - scopedDiagnosticsFactory - ); + return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> replaceDocumentInternal( + documentLink, + document, + nonNullRequestOptions, + finalRequestRetryPolicy, + endToEndPolicyConfig, + scopedDiagnosticsFactory, + requestReference), + requestRetryPolicy), + scopedDiagnosticsFactory), requestReference, isRequestHedged); } private Mono> replaceDocumentInternal( @@ -2404,7 +2414,8 @@ private Mono> replaceDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2422,7 +2433,8 @@ private Mono> replaceDocumentInternal( typedDocument, options, retryPolicyInstance, - clientContextOverride); + clientContextOverride, + requestReference); } catch (Exception e) { logger.debug("Failure in replacing a document due to [{}]", e.getMessage()); @@ -2435,11 +2447,12 @@ public Mono> replaceDocument(Document document, Reque return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> replaceDocumentCore( document, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + isRequestHedged), options, options != null && options.getNonIdempotentWriteRetriesEnabled() ); @@ -2449,7 +2462,8 @@ private Mono> replaceDocumentCore( Document document, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged) { DocumentClientRetryPolicy requestRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(clientContextOverride); @@ -2459,14 +2473,17 @@ private Mono> replaceDocumentCore( collectionCache, requestRetryPolicy, collectionLink, options); } DocumentClientRetryPolicy finalRequestRetryPolicy = requestRetryPolicy; - return ObservableHelper.inlineIfPossibleAsObs( + AtomicReference requestReference = new AtomicReference<>(); + + return handleRegionFeedbackForPointOperation(ObservableHelper.inlineIfPossibleAsObs( () -> replaceDocumentInternal( document, options, finalRequestRetryPolicy, endToEndPolicyConfig, - clientContextOverride), - requestRetryPolicy); + clientContextOverride, + requestReference), + requestRetryPolicy), requestReference, isRequestHedged); } private Mono> replaceDocumentInternal( @@ -2474,7 +2491,8 @@ private Mono> replaceDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference) { try { if (document == null) { @@ -2486,7 +2504,8 @@ private Mono> replaceDocumentInternal( document, options, retryPolicyInstance, - clientContextOverride); + clientContextOverride, + requestReference); } catch (Exception e) { logger.debug("Failure in replacing a database due to [{}]", e.getMessage()); @@ -2499,7 +2518,8 @@ private Mono> replaceDocumentInternal( Document document, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference) { if (document == null) { throw new IllegalArgumentException("document"); @@ -2598,12 +2618,13 @@ public Mono> patchDocument(String documentLink, return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, - (opt, e2ecfg, clientCtxOverride) -> patchDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> patchDocumentCore( documentLink, cosmosPatchOperations, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + isRequestHedged), options, options != null && options.getNonIdempotentWriteRetriesEnabled() ); @@ -2614,25 +2635,29 @@ private Mono> patchDocumentCore( CosmosPatchOperations cosmosPatchOperations, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy documentClientRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(scopedDiagnosticsFactory); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> patchDocumentInternal( - documentLink, - cosmosPatchOperations, - nonNullRequestOptions, - documentClientRetryPolicy, - scopedDiagnosticsFactory), - documentClientRetryPolicy), - scopedDiagnosticsFactory - ); + AtomicReference requestReference = new AtomicReference<>(); + + return handleRegionFeedbackForPointOperation( + getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> patchDocumentInternal( + documentLink, + cosmosPatchOperations, + nonNullRequestOptions, + documentClientRetryPolicy, + scopedDiagnosticsFactory, + requestReference), + documentClientRetryPolicy), + scopedDiagnosticsFactory), requestReference, isRequestHedged); } private Mono> patchDocumentInternal( @@ -2640,7 +2665,8 @@ private Mono> patchDocumentInternal( CosmosPatchOperations cosmosPatchOperations, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference) { checkArgument(StringUtils.isNotEmpty(documentLink), "expected non empty documentLink"); checkNotNull(cosmosPatchOperations, "expected non null cosmosPatchOperations"); @@ -2703,7 +2729,10 @@ private Mono> patchDocumentInternal( collectionObs); return requestObs - .flatMap(req -> patch(request, retryPolicyInstance)) + .flatMap(req -> { + requestReference.set(req); + return patch(request, retryPolicyInstance); + }) .map(resp -> toResourceResponse(resp, Document.class)); } @@ -2712,12 +2741,13 @@ public Mono> deleteDocument(String documentLink, Requ return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> deleteDocumentCore( documentLink, null, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + isRequestHedged), options, options != null && options.getNonIdempotentWriteRetriesEnabled() ); @@ -2728,12 +2758,13 @@ public Mono> deleteDocument(String documentLink, Inte return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> deleteDocumentCore( documentLink, internalObjectNode, opt, e2ecfg, - clientCtxOverride), + clientCtxOverride, + isRequestHedged), options, options != null && options.getNonIdempotentWriteRetriesEnabled() ); @@ -2744,26 +2775,29 @@ private Mono> deleteDocumentCore( InternalObjectNode internalObjectNode, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy requestRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(scopedDiagnosticsFactory); - return getPointOperationResponseMonoWithE2ETimeout( - nonNullRequestOptions, - endToEndPolicyConfig, - ObservableHelper.inlineIfPossibleAsObs( - () -> deleteDocumentInternal( - documentLink, - internalObjectNode, - nonNullRequestOptions, - requestRetryPolicy, - scopedDiagnosticsFactory), - requestRetryPolicy), - scopedDiagnosticsFactory - ); + AtomicReference requestReference = new AtomicReference<>(); + + return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + nonNullRequestOptions, + endToEndPolicyConfig, + ObservableHelper.inlineIfPossibleAsObs( + () -> deleteDocumentInternal( + documentLink, + internalObjectNode, + nonNullRequestOptions, + requestRetryPolicy, + scopedDiagnosticsFactory, + requestReference), + requestRetryPolicy), + scopedDiagnosticsFactory), requestReference, isRequestHedged); } private Mono> deleteDocumentInternal( @@ -2771,7 +2805,8 @@ private Mono> deleteDocumentInternal( InternalObjectNode internalObjectNode, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2808,7 +2843,10 @@ private Mono> deleteDocumentInternal( return requestObs - .flatMap(req -> this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options))) + .flatMap(req -> { + requestReference.set(req); + return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); + }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); } catch (Exception e) { @@ -2867,7 +2905,7 @@ private Mono> readDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Read, - (opt, e2ecfg, clientCtxOverride) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride), + (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, isRequestHedged), options, false, innerDiagnosticsFactory @@ -2878,13 +2916,16 @@ private Mono> readDocumentCore( String documentLink, RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy retryPolicyInstance = this.resetSessionTokenRetryPolicy.getRequestPolicy(scopedDiagnosticsFactory); - return getPointOperationResponseMonoWithE2ETimeout( + AtomicReference requestReference = new AtomicReference<>(); + + return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs( @@ -2892,17 +2933,19 @@ private Mono> readDocumentCore( documentLink, nonNullRequestOptions, retryPolicyInstance, - scopedDiagnosticsFactory), + scopedDiagnosticsFactory, + requestReference), retryPolicyInstance), scopedDiagnosticsFactory - ); + ), requestReference, isRequestHedged); } private Mono> readDocumentInternal( String documentLink, RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + AtomicReference requestReference) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2928,9 +2971,11 @@ private Mono> readDocumentInternal( Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs); - return requestObs.flatMap(req -> - this.read(request, retryPolicyInstance) - .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class))); + return requestObs.flatMap(req -> { + requestReference.set(req); + return this.read(request, retryPolicyInstance) + .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); + }); } catch (Exception e) { logger.debug("Failure in reading a document due to [{}]", e.getMessage()); @@ -5408,7 +5453,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory); + return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false); } ThresholdBasedAvailabilityStrategy availabilityStrategy = @@ -5428,7 +5473,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient Mono initialMonoAcrossAllRegions = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, false) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isCosmosException, @@ -5456,7 +5501,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat // operator below will complete the composite Mono for both successful values // and non-transient errors Mono regionalCrossRegionRetryMono = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, true) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isNonTransientCosmosException, @@ -5868,7 +5913,11 @@ private Mono executeFeedOperationWithAvailabilityStrategy( @FunctionalInterface private interface DocumentPointOperation { - Mono> apply(RequestOptions requestOptions, CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, DiagnosticsClientContext clientContextOverride); + Mono> apply( + RequestOptions requestOptions, + CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, + DiagnosticsClientContext clientContextOverride, + boolean isRequestHedged); } private static class NonTransientPointOperationResult { From 1845447dd626fb525636a8b167715d4238b705f2 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 28 Mar 2024 16:05:53 -0400 Subject: [PATCH 015/140] Fixing compilation errors. --- .../cosmos/RetryContextOnDiagnosticTest.java | 7 +- .../implementation/ConsistencyTests1.java | 4 +- .../implementation/ConsistencyTestsBase.java | 48 +-- .../RequestHeadersSpyWireTest.java | 8 +- .../RxDocumentClientUnderTest.java | 54 +-- .../cosmos/implementation/SessionTest.java | 18 +- .../SpyClientUnderTestFactory.java | 6 +- .../cosmos/implementation/TestSuiteBase.java | 21 +- .../DCDocumentCrudTest.java | 2 +- .../com/azure/cosmos/rx/ChangeFeedTest.java | 4 +- .../azure/cosmos/rx/ResourceTokenTest.java | 10 +- .../azure/cosmos/CosmosAsyncContainer.java | 12 +- .../implementation/AsyncDocumentClient.java | 12 +- .../DocumentServiceRequestContext.java | 9 + .../implementation/RxDocumentClientImpl.java | 309 +++++++++--------- 15 files changed, 269 insertions(+), 255 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java index 29734e89d372..3e2b3074f178 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java @@ -25,6 +25,7 @@ import com.azure.cosmos.implementation.ShouldRetryResult; import com.azure.cosmos.implementation.StoreResponseBuilder; import com.azure.cosmos.implementation.TestConfigurations; +import com.azure.cosmos.implementation.TestUtils; import com.azure.cosmos.implementation.directconnectivity.AddressSelector; import com.azure.cosmos.implementation.directconnectivity.ConsistencyReader; import com.azure.cosmos.implementation.directconnectivity.ConsistencyWriter; @@ -230,7 +231,7 @@ public void retryContextMockTestOnCRUDOperation() throws NoSuchFieldException, I Mockito.when(retryPolicy.getRetryContext()).thenReturn(retryContext); Mockito.when(retryContext.getRetryCount()).thenReturn(1); Mockito.when(mockRetryFactory.getRequestPolicy(ArgumentMatchers.any(DiagnosticsClientContext.class))).thenReturn(retryPolicy); - responseFlux = rxDocumentClient.readDocument(itemSelfLink, requestOptions); + responseFlux = rxDocumentClient.readDocument(itemSelfLink, requestOptions, cosmosAsyncContainer.getLinkWithoutTrailingSlash()); validateServiceResponseSuccess(responseFlux); Mockito.verify(retryContext, Mockito.times(1)).getRetryCount(); @@ -240,7 +241,7 @@ public void retryContextMockTestOnCRUDOperation() throws NoSuchFieldException, I Mockito.when(retryPolicy.getRetryContext()).thenReturn(retryContext); Mockito.when(retryContext.getRetryCount()).thenReturn(1); Mockito.when(mockRetryFactory.getRequestPolicy(ArgumentMatchers.any(DiagnosticsClientContext.class))).thenReturn(retryPolicy); - responseFlux = rxDocumentClient.deleteDocument(itemSelfLink, requestOptions); + responseFlux = rxDocumentClient.deleteDocument(itemSelfLink, requestOptions, cosmosAsyncContainer.getLinkWithoutTrailingSlash()); validateServiceResponseSuccess(responseFlux); Mockito.verify(retryContext, Mockito.times(1)).getRetryCount(); @@ -250,7 +251,7 @@ public void retryContextMockTestOnCRUDOperation() throws NoSuchFieldException, I Mockito.when(retryPolicy.getRetryContext()).thenReturn(retryContext); Mockito.when(retryContext.getRetryCount()).thenReturn(1); Mockito.when(mockRetryFactory.getRequestPolicy(ArgumentMatchers.any(DiagnosticsClientContext.class))).thenReturn(retryPolicy); - responseFlux = rxDocumentClient.replaceDocument(itemSelfLink, new Document(), requestOptions); + responseFlux = rxDocumentClient.replaceDocument(itemSelfLink, new Document(), requestOptions, cosmosAsyncContainer.getLinkWithoutTrailingSlash()); validateServiceResponseSuccess(responseFlux); Mockito.verify(retryContext, Mockito.times(1)).getRetryCount(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java index 22cba6842472..2189f85d9c9a 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java @@ -348,11 +348,11 @@ private void validateSubstatusCodeOnNotFoundExceptionInSessionReadAsync(boolean documentDefinition.setId("1"); Document document = client.createDocument(collection.getSelfLink(), documentDefinition, requestOptions, false).block().getResource(); - Mono> deleteObservable = client.deleteDocument(document.getSelfLink(), requestOptions); + Mono> deleteObservable = client.deleteDocument(document.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .nullResource().build(); validateSuccess(deleteObservable, validator); - Mono> readObservable = client.readDocument(document.getSelfLink(), requestOptions); + Mono> readObservable = client.readDocument(document.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); FailureValidator notFoundValidator = new FailureValidator.Builder().resourceNotFound().unknownSubStatusCode().build(); validateFailure(readObservable, notFoundValidator); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java index cea5534b7bd1..661e90f98a91 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java @@ -77,13 +77,13 @@ void validateConsistentLSN() { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(documentDefinition, "mypk"))); Document document = createDocument(this.writeClient, createdDatabase.getId(), createdCollection.getId(), documentDefinition); - ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options).block(); + ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); assertThat(response.getStatusCode()).isEqualTo(204); long quorumAckedLSN = Long.parseLong(response.getResponseHeaders().get(WFConstants.BackendHeaders.QUORUM_ACKED_LSN)); assertThat(quorumAckedLSN > 0).isTrue(); FailureValidator validator = new FailureValidator.Builder().statusCode(404).lsnGreaterThan(quorumAckedLSN).build(); - Mono> readObservable = this.readClient.readDocument(document.getSelfLink(), options); + Mono> readObservable = this.readClient.readDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); validateFailure(readObservable, validator); } @@ -92,14 +92,14 @@ void validateConsistentLSNAndQuorumAckedLSN() { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(documentDefinition, "mypk"))); Document document = createDocument(this.writeClient, createdDatabase.getId(), createdCollection.getId(), documentDefinition); - ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options).block(); + ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); assertThat(response.getStatusCode()).isEqualTo(204); long quorumAckedLSN = Long.parseLong(response.getResponseHeaders().get(WFConstants.BackendHeaders.QUORUM_ACKED_LSN)); assertThat(quorumAckedLSN > 0).isTrue(); FailureValidator validator = new FailureValidator.Builder().statusCode(404).lsnGreaterThanEqualsTo(quorumAckedLSN).exceptionQuorumAckedLSNInNotNull().build(); - Mono> readObservable = this.readClient.deleteDocument(document.getSelfLink(), options); + Mono> readObservable = this.readClient.deleteDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); validateFailure(readObservable, validator); } @@ -146,10 +146,10 @@ void validateStrongConsistencyOnAsyncReplication(boolean useGateway) throws Inte Document documentDefinition = getDocumentDefinition(); Document document = createDocument(this.writeClient, createdDatabase.getId(), createdCollection.getId(), documentDefinition); - validateStrongConsistency(document); + validateStrongConsistency(document, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); } - void validateStrongConsistency(Document documentToWorkWith) throws InterruptedException { + void validateStrongConsistency(Document documentToWorkWith, String collectionLink) throws InterruptedException { int numberOfTestIteration = 5; Document writeDocument = documentToWorkWith; while (numberOfTestIteration-- > 0) { @@ -157,10 +157,10 @@ void validateStrongConsistency(Document documentToWorkWith) throws InterruptedEx Thread.sleep(1000);//Timestamp is in granularity of seconds. RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(documentToWorkWith, "mypk"))); - Document updatedDocument = this.writeClient.replaceDocument(writeDocument, options).block().getResource(); + Document updatedDocument = this.writeClient.replaceDocument(writeDocument, options, collectionLink).block().getResource(); assertThat(updatedDocument.getTimestamp().isAfter(sourceTimestamp)).isTrue(); - Document readDocument = this.readClient.readDocument(documentToWorkWith.getSelfLink(), options).block().getResource(); + Document readDocument = this.readClient.readDocument(documentToWorkWith.getSelfLink(), options, collectionLink).block().getResource(); assertThat(updatedDocument.getTimestamp().equals(readDocument.getTimestamp())); } } @@ -284,7 +284,7 @@ boolean validateConsistentPrefix(Resource resourceToWorkWith) throws Interrupted } else if (resourceToWorkWith instanceof Document) { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(resourceToWorkWith, "mypk"))); - readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), options) + readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())) .block() .getResource(); } @@ -320,7 +320,7 @@ boolean validateReadSession(Resource resourceToWorkWith) throws InterruptedExcep RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(resourceToWorkWith, "mypk"))); if (resourceToWorkWith instanceof Document) { - readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions).block().getResource(); + readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block().getResource(); } assertThat(readResource.getTimestamp().compareTo(lastReadDateTime) >= 0).isTrue(); lastReadDateTime = readResource.getTimestamp(); @@ -353,7 +353,7 @@ boolean validateWriteSession(Resource resourceToWorkWith) throws InterruptedExce requestOptions.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(resourceToWorkWith, "mypk"))); if (resourceToWorkWith instanceof Document) { readResource = - this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions) + this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())) .block() .getResource(); } @@ -428,8 +428,8 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway) throws Document documentCreated = client2.createDocument(collection.getSelfLink(), documentDefinition, null, true).block().getResource(); RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(documentCreated, "mypk"))); - client2.readDocument(BridgeInternal.getAltLink(documentCreated), requestOptions).block(); - client2.readDocument(documentCreated.getSelfLink(), requestOptions).block(); + client2.readDocument(BridgeInternal.getAltLink(documentCreated), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); + client2.readDocument(documentCreated.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); } { @@ -464,7 +464,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway) throws ResourceResponseValidator successValidator = new ResourceResponseValidator.Builder() .withId(createdDocument.getId()) .build(); - Mono> readObservable = client1.readDocument(createdDocument.getSelfLink(), requestOptions); + Mono> readObservable = client1.readDocument(createdDocument.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); validateSuccess(readObservable, successValidator); { String token1 = ((SessionContainer) client1.getSession()).getSessionToken(BridgeInternal.getAltLink(collectionSameName)); @@ -480,7 +480,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway) throws RequestOptions requestOptions1 = new RequestOptions(); requestOptions1.setSessionToken(higherLsnToken); requestOptions1.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(createdDocument, "mypk"))); - readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions1); + readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions1, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); FailureValidator failureValidator = new FailureValidator.Builder().subStatusCode(1002).build(); validateFailure(readObservable, failureValidator); } @@ -494,7 +494,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway) throws } { // second read should succeed! - readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions); + readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); validateSuccess(readObservable, successValidator); } // verify deleting indeed delete the collection session token @@ -506,7 +506,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway) throws successValidator = new ResourceResponseValidator.Builder() .withId(documentTest.getId()) .build(); - readObservable = client1.readDocument(documentTest.getSelfLink(), options); + readObservable = client1.readDocument(documentTest.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); validateSuccess(readObservable, successValidator); client1.deleteCollection(collectionSameName.getSelfLink(), null).block(); @@ -615,7 +615,7 @@ void validateSessionTokenWithDocumentNotFoundException(boolean useGateway) throw RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(documentResponse.getResource(), "mypk"))); // try to read a non existent document in the same partition that we previously wrote to - Mono> readObservable = validationClient.readDocument(BridgeInternal.getAltLink(documentResponse.getResource()) + "dummy", requestOptions); + Mono> readObservable = validationClient.readDocument(BridgeInternal.getAltLink(documentResponse.getResource()) + "dummy", requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); validateFailure(readObservable, failureValidator); assertThat(isSessionEqual(((SessionContainer) validationClient.getSession()), (SessionContainer) writeClient.getSession())).isTrue(); } finally { @@ -655,7 +655,7 @@ void validateSessionTokenWithExpectedException(boolean useGateway) throws Except requestOptions.setSessionToken(higherLsnToken); // try to read a non existent document in the same partition that we previously wrote to Mono> readObservable = writeClient.readDocument(BridgeInternal.getAltLink(documentResponse.getResource()), - requestOptions); + requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); validateFailure(readObservable, failureValidator); } finally { @@ -763,22 +763,22 @@ void validateSessionTokenMultiPartitionCollection(boolean useGateway) throws Exc RequestOptions option = new RequestOptions(); option.setSessionToken(sessionToken); option.setPartitionKey(new PartitionKey(2)); - writeClient.readDocument(childResource2.getResource().getSelfLink(), option).block(); + writeClient.readDocument(childResource2.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); option = new RequestOptions(); option.setSessionToken(StringUtils.EMPTY); option.setPartitionKey(new PartitionKey(1)); - writeClient.readDocument(childResource1.getResource().getSelfLink(), option).block(); + writeClient.readDocument(childResource1.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); option = new RequestOptions(); option.setSessionToken(sessionToken); option.setPartitionKey(new PartitionKey(1)); - Mono> readObservable = writeClient.readDocument(childResource1.getResource().getSelfLink(), option); + Mono> readObservable = writeClient.readDocument(childResource1.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); FailureValidator failureValidator = new FailureValidator.Builder().statusCode(HttpConstants.StatusCodes.NOTFOUND).subStatusCode(HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE).build(); validateFailure(readObservable, failureValidator); - readObservable = writeClient.readDocument(childResource2.getResource().getSelfLink(), option); + readObservable = writeClient.readDocument(childResource2.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); failureValidator = new FailureValidator.Builder().statusCode(HttpConstants.StatusCodes.NOTFOUND).subStatusCode(HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE).build(); validateFailure(readObservable, failureValidator); @@ -816,7 +816,7 @@ void validateSessionTokenFromCollectionReplaceIsServerToken(boolean useGateway) Document doc = client1.createDocument(createdCollection.getSelfLink(), getDocumentDefinition(), null, true).block().getResource(); RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(ModelBridgeInternal.getObjectFromJsonSerializable(doc, "mypk"))); - Document doc1 = client1.readDocument(BridgeInternal.getAltLink(doc), requestOptions).block().getResource(); + Document doc1 = client1.readDocument(BridgeInternal.getAltLink(doc), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block().getResource(); String token1 = ((SessionContainer) client1.getSession()).getSessionToken(createdCollection.getSelfLink()); client2 = (RxDocumentClientImpl) new AsyncDocumentClient.Builder() diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java index 9286deaf7b30..fe074fb75ac7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java @@ -203,7 +203,7 @@ public void readItemWithMaxIntegratedCacheStaleness(CosmosItemRequestOptions cos RequestOptions requestOptions = ModelBridgeInternal.toRequestOptions(cosmosItemRequestOptions); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - client.readDocument(documentLink, requestOptions).block(); + client.readDocument(documentLink, requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); List requests = client.getCapturedRequests(); for (HttpRequest httpRequest : requests) { @@ -225,7 +225,7 @@ public void readItemWithMaxIntegratedCacheStalenessInNanoseconds() { RequestOptions requestOptions = ModelBridgeInternal.toRequestOptions(cosmosItemRequestOptions); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions).block()) + assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions, getDocumentCollectionLink()).block()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("MaxIntegratedCacheStaleness granularity is milliseconds"); } @@ -243,7 +243,7 @@ public void readItemWithMaxIntegratedCacheStalenessInNegative() { RequestOptions requestOptions = ModelBridgeInternal.toRequestOptions(cosmosItemRequestOptions); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions).block()) + assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions, getDocumentCollectionLink()).block()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("MaxIntegratedCacheStaleness duration cannot be negative"); } @@ -263,7 +263,7 @@ public void readItemWithCacheBypass(boolean cacheBypass) { RequestOptions requestOptions = ModelBridgeInternal.toRequestOptions(cosmosItemRequestOptions); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - ResourceResponse response = client.readDocument(documentLink, requestOptions).block(); + ResourceResponse response = client.readDocument(documentLink, requestOptions, getDocumentCollectionLink()).block(); if (cacheBypass) { String responseHeader = response.getResponseHeaders().get("x-ms-cosmos-cache-bypass"); assertThat(responseHeader).isNotNull(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java index a57d71754e9f..e5ce5caee383 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java @@ -62,31 +62,31 @@ public RxDocumentClientUnderTest(URI serviceEndpoint, init(null, null); } - RxGatewayStoreModel createRxGatewayProxy( - ISessionContainer sessionContainer, - ConsistencyLevel consistencyLevel, - QueryCompatibilityMode queryCompatibilityMode, - UserAgentContainer userAgentContainer, - GlobalEndpointManager globalEndpointManager, - HttpClient rxOrigClient, - ApiType apiType) { - - origHttpClient = rxOrigClient; - spyHttpClient = Mockito.spy(rxOrigClient); - - doAnswer((Answer>) invocationOnMock -> { - HttpRequest httpRequest = invocationOnMock.getArgument(0, HttpRequest.class); - Duration responseTimeout = invocationOnMock.getArgument(1, Duration.class); - httpRequests.add(httpRequest); - return origHttpClient.send(httpRequest, responseTimeout); - }).when(spyHttpClient).send(Mockito.any(HttpRequest.class), Mockito.any(Duration.class)); - - return super.createRxGatewayProxy(sessionContainer, - consistencyLevel, - queryCompatibilityMode, - userAgentContainer, - globalEndpointManager, - spyHttpClient, - apiType); - } +// RxGatewayStoreModel createRxGatewayProxy( +// ISessionContainer sessionContainer, +// ConsistencyLevel consistencyLevel, +// QueryCompatibilityMode queryCompatibilityMode, +// UserAgentContainer userAgentContainer, +// GlobalEndpointManager globalEndpointManager, +// HttpClient rxOrigClient, +// ApiType apiType) { +// +// origHttpClient = rxOrigClient; +// spyHttpClient = Mockito.spy(rxOrigClient); +// +// doAnswer((Answer>) invocationOnMock -> { +// HttpRequest httpRequest = invocationOnMock.getArgument(0, HttpRequest.class); +// Duration responseTimeout = invocationOnMock.getArgument(1, Duration.class); +// httpRequests.add(httpRequest); +// return origHttpClient.send(httpRequest, responseTimeout); +// }).when(spyHttpClient).send(Mockito.any(HttpRequest.class), Mockito.any(Duration.class)); +// +// return super.createRxGatewayProxy(sessionContainer, +// consistencyLevel, +// queryCompatibilityMode, +// userAgentContainer, +// globalEndpointManager, +// spyHttpClient, +// apiType); +// } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java index 8663514dde43..cd73d0e129ba 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java @@ -129,12 +129,12 @@ public void sessionConsistency_ReadYourWrites(boolean isNameBased) { spyClient.clearCapturedRequests(); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options, getCollectionLink(true)).block(); assertThat(getSessionTokensInRequests()).hasSize(1); assertThat(getSessionTokensInRequests().get(0)).isNotEmpty(); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options, getCollectionLink(true)).block(); // same session token expected - because we collect // distinct session tokens only one of them should be kept @@ -171,14 +171,14 @@ public void partitionedSessionToken(boolean isNameBased) throws NoSuchMethodExce spyClient.clearCapturedRequests(); // Session token set for default session consistency - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); assertThat(getSessionTokensInRequests().get(0)).isNotEmpty(); assertThat(getSessionTokensInRequests().get(0)).doesNotContain(","); // making sure we have only one scope session token // Session token set for request session consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.SESSION); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); assertThat(getSessionTokensInRequests()).hasSize(1); assertThat(getSessionTokensInRequests().get(0)).isNotEmpty(); assertThat(getSessionTokensInRequests().get(0)).doesNotContain(","); // making sure we have only one scope session token @@ -337,13 +337,13 @@ public void sessionTokenNotRequired(boolean isNameBased) { // No session token set for EVENTUAL consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.EVENTUAL); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); assertThat(getSessionTokensInRequests()).hasSize(0); // No session token set for CONSISTENT_PREFIX consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.CONSISTENT_PREFIX); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); assertThat(getSessionTokensInRequests()).hasSize(0); if (globalEndpointManager.getLatestDatabaseAccount().getConsistencyPolicy().getDefaultConsistencyLevel().equals(ConsistencyLevel.STRONG) || @@ -351,7 +351,7 @@ public void sessionTokenNotRequired(boolean isNameBased) { // No session token set for BOUNDED_STALENESS consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.BOUNDED_STALENESS); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); assertThat(getSessionTokensInRequests()).hasSize(0); } @@ -359,7 +359,7 @@ public void sessionTokenNotRequired(boolean isNameBased) { // No session token set for STRONG consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.STRONG); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); assertThat(getSessionTokensInRequests()).hasSize(0); } } @@ -394,7 +394,7 @@ public void sessionTokenInDocumentRead(boolean isNameBased) throws UnsupportedEn .getResource(); final String documentLink = getDocumentLink(document, isNameBased); - spyClient.readDocument(documentLink, options).block() + spyClient.readDocument(documentLink, options, getCollectionLink(true)).block() .getResource(); List documentReadHttpRequests = spyClient.getCapturedRequests().stream() diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index cad0e7e7b177..dcb7e03cf866 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -120,7 +120,8 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient rxClient, - ApiType apiType) { + ApiType apiType, + IGlobalPartitionEndpointManager globalPartitionEndpointManager) { this.origRxGatewayStoreModel = super.createRxGatewayProxy( sessionContainer, consistencyLevel, @@ -128,7 +129,8 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, userAgentContainer, globalEndpointManager, rxClient, - apiType); + apiType, + globalPartitionEndpointManager); this.requests = Collections.synchronizedList(new ArrayList<>()); this.spyRxGatewayStoreModel = Mockito.spy(this.origRxGatewayStoreModel); this.initRequestCapture(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java index 43876eda4d79..0e48ca8c47e0 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java @@ -216,7 +216,7 @@ protected static void truncateCollection(DocumentCollection collection) { requestOptions.setPartitionKey(new PartitionKey(propertyValue)); } - return houseKeepingClient.deleteDocument(doc.getSelfLink(), requestOptions); + return houseKeepingClient.deleteDocument(doc.getSelfLink(), requestOptions, collection.getSelfLink()); }).then().block(); logger.info("Truncating DocumentCollection {} triggers ...", collection.getId()); @@ -578,27 +578,14 @@ public static void deleteDocumentIfExists(AsyncDocumentClient client, String dat Document.class) .single().block().getResults(); if (!res.isEmpty()) { - deleteDocument(client, TestUtils.getDocumentNameLink(databaseId, collectionId, docId), pk); + deleteDocument(client, TestUtils.getDocumentNameLink(databaseId, collectionId, docId), pk, TestUtils.getCollectionNameLink(databaseId, collectionId)); } } - public static void safeDeleteDocument(AsyncDocumentClient client, String documentLink, RequestOptions options) { - if (client != null && documentLink != null) { - try { - client.deleteDocument(documentLink, options).block(); - } catch (Exception e) { - CosmosException dce = Utils.as(e, CosmosException.class); - if (dce == null || dce.getStatusCode() != 404) { - throw e; - } - } - } - } - - public static void deleteDocument(AsyncDocumentClient client, String documentLink, PartitionKey pk) { + public static void deleteDocument(AsyncDocumentClient client, String documentLink, PartitionKey pk, String collectionLink) { RequestOptions options = new RequestOptions(); options.setPartitionKey(pk); - client.deleteDocument(documentLink, options).block(); + client.deleteDocument(documentLink, options, collectionLink).block(); } public static void deleteUserIfExists(AsyncDocumentClient client, String databaseId, String userId) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java index aab22fd0c56b..97526ebbe620 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java @@ -170,7 +170,7 @@ public void read() throws Exception { .withId(docDefinition.getId()) .build(); - validateSuccess(client.readDocument(docLink, options), validator, TIMEOUT); + validateSuccess(client.readDocument(docLink, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())), validator, TIMEOUT); validateNoDocumentOperationThroughGateway(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java index 83c1f975c5ea..8079dd431c59 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java @@ -220,7 +220,7 @@ private void changeFeed_withUpdatesAndDelete(boolean enableFullFidelityChangeFee .getContinuationToken(); Document docToBeDeleted = partitionKeyToDocuments.get(partitionKey).stream().findFirst().get(); - deleteDocument(client, docToBeDeleted.getSelfLink(), new PartitionKey(partitionKey)); + deleteDocument(client, docToBeDeleted.getSelfLink(), new PartitionKey(partitionKey), TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); CosmosChangeFeedRequestOptions changeFeedOptionForContinuationAfterDeletes = CosmosChangeFeedRequestOptions @@ -454,7 +454,7 @@ public Document updateDocument(AsyncDocumentClient client, Document originalDocu BridgeInternal.setProperty(originalDocument, "prop", uuid); return client - .replaceDocument(originalDocument.getSelfLink(), originalDocument, null) + .replaceDocument(originalDocument.getSelfLink(), originalDocument, null, getCollectionLink()) .block() .getResource(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java index 4c4c8768ccd1..6c68e43b642e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java @@ -311,7 +311,7 @@ public void readDocumentFromPermissionFeed(String documentUrl, Permission permis options.setPartitionKey(PartitionKey.NONE); } Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options); + .readDocument(documentUrl, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(documentId).build(); validateSuccess(readObservable, validator); @@ -342,7 +342,7 @@ public void readDocumentFromResouceToken(String resourceToken) throws Exception RequestOptions options = new RequestOptions(); options.setPartitionKey(PartitionKey.NONE); Mono> readObservable = asyncClientResourceToken - .readDocument(createdDocument.getSelfLink(), options); + .readDocument(createdDocument.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(createdDocument.getId()).build(); validateSuccess(readObservable, validator); @@ -378,7 +378,7 @@ public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(Strin RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(partitionKey)); Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options); + .readDocument(documentUrl, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(documentId).build(); validateSuccess(readObservable, validator); @@ -414,7 +414,7 @@ public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound( RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(partitionKey)); Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options); + .readDocument(documentUrl, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); FailureValidator validator = new FailureValidator.Builder().resourceNotFound().build(); validateFailure(readObservable, validator); } finally { @@ -446,7 +446,7 @@ public void readDocumentFromCollPermissionWithDiffPartitionKey_WithException() t RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(PARTITION_KEY_VALUE_2)); Mono> readObservable = asyncClientResourceToken - .readDocument(createdDocumentWithPartitionKey.getSelfLink(), options); + .readDocument(createdDocumentWithPartitionKey.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); FailureValidator validator = new FailureValidator.Builder().resourceTokenNotFound().build(); validateFailure(readObservable, validator); } finally { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java index 19956728701c..cba206803c26 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java @@ -384,7 +384,7 @@ private Mono> replaceItemWithTrackingId(Class itemT Mono> readMono = this.getDatabase().getDocClientWrapper() - .readDocument(getItemLink(itemId), requestOptions) + .readDocument(getItemLink(itemId), requestOptions, this.getLinkWithoutTrailingSlash()) .map(response -> { mergeDiagnostics(response, cosmosException); return ModelBridgeInternal @@ -466,7 +466,7 @@ private Mono> createItemWithTrackingId( .toPartitionKey(partitionKeyInternal); readRequestOptions.setPartitionKey(partitionKey); - return clientWrapper.readDocument(getItemLink(itemId), readRequestOptions) + return clientWrapper.readDocument(getItemLink(itemId), readRequestOptions, this.getLinkWithoutTrailingSlash()) .map(response -> { mergeDiagnostics(response, cosmosException); return ModelBridgeInternal @@ -2075,7 +2075,7 @@ private Mono> deleteItemInternalCore( Context context) { Mono> responseMono = this.getDatabase() .getDocClientWrapper() - .deleteDocument(getItemLink(itemId), internalObjectNode, requestOptions) + .deleteDocument(getItemLink(itemId), internalObjectNode, requestOptions, this.getLinkWithoutTrailingSlash()) .map(response -> ModelBridgeInternal.createCosmosAsyncItemResponseWithObjectType(response)) .single(); CosmosAsyncClient client = database.getClient(); @@ -2132,7 +2132,7 @@ private Mono> replaceItemInternalCore( return this.getDatabase() .getDocClientWrapper() - .replaceDocument(getItemLink(itemId), doc, requestOptions) + .replaceDocument(getItemLink(itemId), doc, requestOptions, getLinkWithoutTrailingSlash()) .map(response -> ModelBridgeInternal.createCosmosAsyncItemResponse(response, itemType, getItemDeserializer())) .single(); } @@ -2225,7 +2225,7 @@ private Mono> patchItemInternal( Mono> responseMono = this.getDatabase() .getDocClientWrapper() - .patchDocument(getItemLink(itemId), cosmosPatchOperations, requestOptions) + .patchDocument(getItemLink(itemId), cosmosPatchOperations, requestOptions, this.getLinkWithoutTrailingSlash()) .map(response -> ModelBridgeInternal.createCosmosAsyncItemResponse(response, itemType, getItemDeserializer())); CosmosAsyncClient client = database @@ -2290,7 +2290,7 @@ private Mono> readItemInternal( RequestOptions requestOptions, Class itemType, Context context) { Mono> responseMono = this.getDatabase().getDocClientWrapper() - .readDocument(getItemLink(itemId), requestOptions) + .readDocument(getItemLink(itemId), requestOptions, this.getLinkWithoutTrailingSlash()) .map(response -> ModelBridgeInternal.createCosmosAsyncItemResponse(response, itemType, getItemDeserializer())) .single(); CosmosAsyncClient client = database diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index a9a197055f57..d597310b8352 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -593,7 +593,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response with the replaced document or an error. */ - Mono> replaceDocument(String documentLink, Object document, RequestOptions options); + Mono> replaceDocument(String documentLink, Object document, RequestOptions options, String collectionLink); /** * Apply patch on an item. @@ -608,7 +608,7 @@ Mono> upsertDocument(String collectionLink, Object do * * @return a {@link Mono} containing the single resource response with the patched document or an error. */ - Mono> patchDocument(String documentLink, CosmosPatchOperations cosmosPatchOperations, RequestOptions options); + Mono> patchDocument(String documentLink, CosmosPatchOperations cosmosPatchOperations, RequestOptions options, String collectionLink); /** * Replaces a document with the passed in document. @@ -621,7 +621,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response with the replaced document or an error. */ - Mono> replaceDocument(Document document, RequestOptions options); + Mono> replaceDocument(Document document, RequestOptions options, String collectionLink); /** * Deletes a document @@ -634,7 +634,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response for the deleted document or an error. */ - Mono> deleteDocument(String documentLink, RequestOptions options); + Mono> deleteDocument(String documentLink, RequestOptions options, String collectionLink); /** * Deletes a document @@ -647,7 +647,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response for the deleted document or an error. */ - Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options); + Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options, String collectionLink); Mono> deleteAllDocumentsByPartitionKey(String collectionLink, PartitionKey partitionKey, RequestOptions options); /** @@ -661,7 +661,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response with the read document or an error. */ - Mono> readDocument(String documentLink, RequestOptions options); + Mono> readDocument(String documentLink, RequestOptions options, String collectionLink); /** * Reads all documents in a document collection. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index 82b5152fa6e6..a75133795e68 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -49,6 +49,7 @@ public class DocumentServiceRequestContext implements Cloneable { private CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig; private AtomicBoolean isRequestCancelledOnTimeout = null; private volatile List excludeRegions; + private volatile boolean isRequestHedged = false; // For cancelled rntbd requests, track the response as OperationCancelledException which later will be used to populate the cosmosDiagnostics public final Map rntbdCancelledRequestMap = new ConcurrentHashMap<>(); @@ -160,5 +161,13 @@ public List getExcludeRegions() { public void setExcludeRegions(List excludeRegions) { this.excludeRegions = excludeRegions; } + + public void setIsRequestHedged(boolean isRequestHedged) { + this.isRequestHedged = isRequestHedged; + } + + public boolean isRequestHedged() { + return this.isRequestHedged; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 74a6b71016c3..76c312e7cf6a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2110,8 +2110,8 @@ public Mono> createDocument( clientCtxOverride, isRequestHedged), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() - ); + options != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink); } private Mono> createDocumentCore( @@ -2279,8 +2279,8 @@ public Mono> upsertDocument(String collectionLink, Ob (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> upsertDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, isRequestHedged), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() - ); + options != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink); } private Mono> upsertDocumentCore( @@ -2355,7 +2355,7 @@ private Mono> upsertDocumentInternal( @Override public Mono> replaceDocument(String documentLink, Object document, - RequestOptions options) { + RequestOptions options, String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, @@ -2368,8 +2368,8 @@ public Mono> replaceDocument(String documentLink, Obj clientCtxOverride, isRequestHedged), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() - ); + options != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink); } private Mono> replaceDocumentCore( @@ -2443,7 +2443,7 @@ private Mono> replaceDocumentInternal( } @Override - public Mono> replaceDocument(Document document, RequestOptions options) { + public Mono> replaceDocument(Document document, RequestOptions options, String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, @@ -2454,8 +2454,8 @@ public Mono> replaceDocument(Document document, Reque clientCtxOverride, isRequestHedged), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() - ); + options != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink); } private Mono> replaceDocumentCore( @@ -2614,7 +2614,8 @@ private CosmosEndToEndOperationLatencyPolicyConfig getEffectiveEndToEndOperation @Override public Mono> patchDocument(String documentLink, CosmosPatchOperations cosmosPatchOperations, - RequestOptions options) { + RequestOptions options, + String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, @@ -2626,8 +2627,8 @@ public Mono> patchDocument(String documentLink, clientCtxOverride, isRequestHedged), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() - ); + options != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink); } private Mono> patchDocumentCore( @@ -2737,7 +2738,7 @@ private Mono> patchDocumentInternal( } @Override - public Mono> deleteDocument(String documentLink, RequestOptions options) { + public Mono> deleteDocument(String documentLink, RequestOptions options, String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, @@ -2749,12 +2750,12 @@ public Mono> deleteDocument(String documentLink, Requ clientCtxOverride, isRequestHedged), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() - ); + options != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink); } @Override - public Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options) { + public Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options, String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, @@ -2766,8 +2767,8 @@ public Mono> deleteDocument(String documentLink, Inte clientCtxOverride, isRequestHedged), options, - options != null && options.getNonIdempotentWriteRetriesEnabled() - ); + options != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink); } private Mono> deleteDocumentCore( @@ -2893,14 +2894,15 @@ private Mono> deleteAllDocumentsByPartitionKeyInterna } @Override - public Mono> readDocument(String documentLink, RequestOptions options) { - return readDocument(documentLink, options, this); + public Mono> readDocument(String documentLink, RequestOptions options, String collectionLink) { + return readDocument(documentLink, options, this, collectionLink); } private Mono> readDocument( String documentLink, RequestOptions options, - DiagnosticsClientContext innerDiagnosticsFactory) { + DiagnosticsClientContext innerDiagnosticsFactory, + String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, @@ -2908,8 +2910,8 @@ private Mono> readDocument( (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, isRequestHedged), options, false, - innerDiagnosticsFactory - ); + innerDiagnosticsFactory, + collectionLink); } private Mono> readDocumentCore( @@ -3083,7 +3085,8 @@ public Mono> readMany( partitionRangeItemKeyMap, resourceLink, state.getQueryOptions(), - klass); + klass, + collectionLink); // create the executable query Flux> queries = queryForReadMany( @@ -3366,7 +3369,8 @@ private Flux> pointReadsForReadMany( List> singleItemPartitionRequestMap, String resourceLink, CosmosQueryRequestOptions queryRequestOptions, - Class klass) { + Class klass, + String collectionLink) { // if there is any factory method being passed in, use the factory method to deserializ the object // else fallback to use the original way @@ -3382,7 +3386,7 @@ private Flux> pointReadsForReadMany( .getCosmosQueryRequestOptionsAccessor() .toRequestOptions(queryRequestOptions); requestOptions.setPartitionKey(firstIdentity.getPartitionKey()); - return this.readDocument((resourceLink + firstIdentity.getId()), requestOptions, diagnosticsFactory) + return this.readDocument((resourceLink + firstIdentity.getId()), requestOptions, diagnosticsFactory, collectionLink) .flatMap(resourceResponse -> Mono.just( new ImmutablePair, CosmosException>(resourceResponse, null) )) @@ -5410,7 +5414,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat OperationType operationType, DocumentPointOperation callback, RequestOptions initialRequestOptions, - boolean idempotentWriteRetriesEnabled) { + boolean idempotentWriteRetriesEnabled, + String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( resourceType, @@ -5418,8 +5423,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat callback, initialRequestOptions, idempotentWriteRetriesEnabled, - this - ); + this, + collectionLink); } private Mono> wrapPointOperationWithAvailabilityStrategy( @@ -5428,7 +5433,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat DocumentPointOperation callback, RequestOptions initialRequestOptions, boolean idempotentWriteRetriesEnabled, - DiagnosticsClientContext innerDiagnosticsFactory) { + DiagnosticsClientContext innerDiagnosticsFactory, + String collectionLink) { checkNotNull(resourceType, "Argument 'resourceType' must not be null."); checkNotNull(operationType, "Argument 'operationType' must not be null."); @@ -5462,129 +5468,136 @@ private Mono> wrapPointOperationWithAvailabilityStrat final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); - orderedApplicableRegionsForSpeculation - .forEach(region -> { - RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); - - if (monoList.isEmpty()) { - // no special error handling for transient errors to suppress them here - // because any cross-regional retries are expected to be processed - // by the ClientRetryPolicy for the initial request - so, any outcome of the - // initial Mono should be treated as non-transient error - even when - // the error would otherwise be treated as transient - Mono initialMonoAcrossAllRegions = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, false) - .map(NonTransientPointOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isCosmosException, - t -> Mono.just( - new NonTransientPointOperationResult( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); - - if (logger.isDebugEnabled()) { - monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( - "STARTING to process {} operation in region '{}'", - operationType, - region))); - } else { - monoList.add(initialMonoAcrossAllRegions); - } - } else { - clonedOptions.setExcludeRegions( - getEffectiveExcludedRegionsForHedging( - nonNullRequestOptions.getExcludeRegions(), - orderedApplicableRegionsForSpeculation, - region) - ); - - // Non-Transient errors are mapped to a value - this ensures the firstWithValue - // operator below will complete the composite Mono for both successful values - // and non-transient errors - Mono regionalCrossRegionRetryMono = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, true) - .map(NonTransientPointOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isNonTransientCosmosException, - t -> Mono.just( - new NonTransientPointOperationResult( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); - - Duration delayForCrossRegionalRetry = (availabilityStrategy) - .getThreshold() - .plus((availabilityStrategy) - .getThresholdStep() - .multipliedBy(monoList.size() - 1)); - - if (logger.isDebugEnabled()) { - monoList.add( - regionalCrossRegionRetryMono - .doOnSubscribe(c -> logger.debug("STARTING to process {} operation in region '{}'", operationType, region)) - .delaySubscription(delayForCrossRegionalRetry)); - } else { - monoList.add( - regionalCrossRegionRetryMono - .delaySubscription(delayForCrossRegionalRetry)); - } - } - }); - - // NOTE - merging diagnosticsFactory cannot only happen in - // doFinally operator because the doFinally operator is a side effect method - - // meaning it executes concurrently with firing the onComplete/onError signal - // doFinally is also triggered by cancellation - // So, to make sure merging the Context happens synchronously in line we - // have to ensure merging is happening on error/completion - // and also in doOnCancel. - return Mono - .firstWithValue(monoList) - .flatMap(nonTransientResult -> { - diagnosticsFactory.merge(nonNullRequestOptions); - if (nonTransientResult.isError()) { - return Mono.error(nonTransientResult.exception); - } - - return Mono.just(nonTransientResult.response); - }) - .onErrorMap(throwable -> { - Throwable exception = Exceptions.unwrap(throwable); + return this.collectionCache.resolveByNameAsync(null, collectionLink, null) + .flatMap(collection -> { + PartitionKeyDefinition partitionKeyDefinition = collection.getPartitionKey(); + PartitionKey partitionKey = nonNullRequestOptions.getPartitionKey(); + + orderedApplicableRegionsForSpeculation + .forEach(region -> { + RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); + + if (monoList.isEmpty()) { + // no special error handling for transient errors to suppress them here + // because any cross-regional retries are expected to be processed + // by the ClientRetryPolicy for the initial request - so, any outcome of the + // initial Mono should be treated as non-transient error - even when + // the error would otherwise be treated as transient + Mono initialMonoAcrossAllRegions = + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, false) + .map(NonTransientPointOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isCosmosException, + t -> Mono.just( + new NonTransientPointOperationResult( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + + if (logger.isDebugEnabled()) { + monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( + "STARTING to process {} operation in region '{}'", + operationType, + region))); + } else { + monoList.add(initialMonoAcrossAllRegions); + } + } else { + clonedOptions.setExcludeRegions( + getEffectiveExcludedRegionsForHedging( + nonNullRequestOptions.getExcludeRegions(), + orderedApplicableRegionsForSpeculation, + region) + ); + + // Non-Transient errors are mapped to a value - this ensures the firstWithValue + // operator below will complete the composite Mono for both successful values + // and non-transient errors + Mono regionalCrossRegionRetryMono = + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, true) + .map(NonTransientPointOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isNonTransientCosmosException, + t -> Mono.just( + new NonTransientPointOperationResult( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + + Duration delayForCrossRegionalRetry = (availabilityStrategy) + .getThreshold() + .plus((availabilityStrategy) + .getThresholdStep() + .multipliedBy(monoList.size() - 1)); + + if (logger.isDebugEnabled()) { + monoList.add( + regionalCrossRegionRetryMono + .doOnSubscribe(c -> logger.debug("STARTING to process {} operation in region '{}'", operationType, region)) + .delaySubscription(delayForCrossRegionalRetry)); + } else { + monoList.add( + regionalCrossRegionRetryMono + .delaySubscription(delayForCrossRegionalRetry)); + } + } + }); - if (exception instanceof NoSuchElementException) { + // NOTE - merging diagnosticsFactory cannot only happen in + // doFinally operator because the doFinally operator is a side effect method - + // meaning it executes concurrently with firing the onComplete/onError signal + // doFinally is also triggered by cancellation + // So, to make sure merging the Context happens synchronously in line we + // have to ensure merging is happening on error/completion + // and also in doOnCancel. + return Mono + .firstWithValue(monoList) + .flatMap(nonTransientResult -> { + diagnosticsFactory.merge(nonNullRequestOptions); + if (nonTransientResult.isError()) { + return Mono.error(nonTransientResult.exception); + } - List innerThrowables = Exceptions - .unwrapMultiple(exception.getCause()); + return Mono.just(nonTransientResult.response); + }) + .onErrorMap(throwable -> { + Throwable exception = Exceptions.unwrap(throwable); + + if (exception instanceof NoSuchElementException) { + + List innerThrowables = Exceptions + .unwrapMultiple(exception.getCause()); + + int index = 0; + for (Throwable innerThrowable : innerThrowables) { + Throwable innerException = Exceptions.unwrap(innerThrowable); + + // collect latest CosmosException instance bubbling up for a region + if (innerException instanceof CosmosException) { + CosmosException cosmosException = Utils.as(innerException, CosmosException.class); + diagnosticsFactory.merge(nonNullRequestOptions); + return cosmosException; + } else if (innerException instanceof NoSuchElementException) { + logger.trace( + "Operation in {} completed with empty result because it was cancelled.", + orderedApplicableRegionsForSpeculation.get(index)); + } else if (logger.isWarnEnabled()) { + String message = "Unexpected Non-CosmosException when processing operation in '" + + orderedApplicableRegionsForSpeculation.get(index) + + "'."; + logger.warn( + message, + innerException + ); + } - int index = 0; - for (Throwable innerThrowable : innerThrowables) { - Throwable innerException = Exceptions.unwrap(innerThrowable); + index++; + } + } - // collect latest CosmosException instance bubbling up for a region - if (innerException instanceof CosmosException) { - CosmosException cosmosException = Utils.as(innerException, CosmosException.class); diagnosticsFactory.merge(nonNullRequestOptions); - return cosmosException; - } else if (innerException instanceof NoSuchElementException) { - logger.trace( - "Operation in {} completed with empty result because it was cancelled.", - orderedApplicableRegionsForSpeculation.get(index)); - } else if (logger.isWarnEnabled()) { - String message = "Unexpected Non-CosmosException when processing operation in '" - + orderedApplicableRegionsForSpeculation.get(index) - + "'."; - logger.warn( - message, - innerException - ); - } - index++; - } - } - - diagnosticsFactory.merge(nonNullRequestOptions); + return exception; + }) + .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); - return exception; - }) - .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); + }); } private static boolean isCosmosException(Throwable t) { @@ -5827,6 +5840,8 @@ private Mono executeFeedOperationWithAvailabilityStrategy( region) ); + clonedRequest.requestContext.setIsRequestHedged(true); + // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors From d24e0a4cd92a05038313ab564aca7077eb50a8aa Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 29 Mar 2024 12:04:17 -0400 Subject: [PATCH 016/140] Added partitionKeyRange detection for point operations in document client layer. --- .../java/com/azure/cosmos/CosmosItemTest.java | 2 +- ...itionEndpointManagerForCircuitBreaker.java | 4 ++++ .../implementation/RxDocumentClientImpl.java | 22 +++++++++++++++---- .../implementation/RxGatewayStoreModel.java | 2 +- .../directconnectivity/AddressResolver.java | 2 +- 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java index 097b50ba77f4..8d38d52d7f38 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java @@ -162,7 +162,7 @@ public void readItemWithVeryLargePartitionKey() throws Exception { validateItemResponse(docDefinition, readResponse); } - @Test(groups = { "fast" }, timeOut = TIMEOUT) + @Test(groups = { "fast" }/*, timeOut = TIMEOUT*/) public void readItem() throws Exception { InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString()); CosmosItemResponse itemResponse = container.createItem(properties); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 9d9d3ad7b29c..6e6e3fd6a94e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -137,6 +137,10 @@ public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceReq @Override public boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest request) { + if (request.isMetadataRequest()) { + return true; + } + if (request == null) { throw new IllegalArgumentException("request cannot be null!"); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 76c312e7cf6a..622ea4b05b99 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5463,16 +5463,29 @@ private Mono> wrapPointOperationWithAvailabilityStrat } ThresholdBasedAvailabilityStrategy availabilityStrategy = - (ThresholdBasedAvailabilityStrategy)endToEndPolicyConfig.getAvailabilityStrategy(); + (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); List> monoList = new ArrayList<>(); final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); return this.collectionCache.resolveByNameAsync(null, collectionLink, null) - .flatMap(collection -> { + .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + PartitionKeyDefinition partitionKeyDefinition = collection.getPartitionKey(); PartitionKey partitionKey = nonNullRequestOptions.getPartitionKey(); + // todo: validate if the below is possible + if (collectionRoutingMapValueHolder.v == null) { + // throw new BulkExecutorUtil.CollectionRoutingMapNotFoundException(); + return Mono.error(new IllegalStateException("")); + } + + PartitionKeyRange partitionKeyRange = collectionRoutingMapValueHolder.v.getRangeByEffectivePartitionKey( + PartitionKeyInternalHelper.getEffectivePartitionKeyString( + ModelBridgeInternal.getPartitionKeyInternal(partitionKey), + partitionKeyDefinition)); + orderedApplicableRegionsForSpeculation .forEach(region -> { RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); @@ -5596,10 +5609,11 @@ private Mono> wrapPointOperationWithAvailabilityStrat return exception; }) .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); - - }); + })); } + + private static boolean isCosmosException(Throwable t) { final Throwable unwrappedException = Exceptions.unwrap(t); return unwrappedException instanceof CosmosException; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 6699b3275dd6..0a290f6b506e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -544,7 +544,7 @@ private Mono invokeAsyncInternal(RxDocumentServiceReq private Mono invokeAsync(RxDocumentServiceRequest request) { - if (this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { + if (!this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { return Mono.error(new ServiceUnavailableException("PkRange is unavailable at region", null, request.requestContext.locationEndpointToRoute, HttpConstants.SubStatusCodes.UNKNOWN)); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index c73b4b5549d2..6dc58e0dc249 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -88,7 +88,7 @@ public Mono resolveAsync( request.requestContext.resolvedPartitionKeyRange = result.TargetPartitionKeyRange; // TODO: use GlobalPartitionEndpointManager to add a partition-level request override - if (this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { + if (!this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { return Mono.error(new ServiceUnavailableException("PkRange is unavailable at region", null, request.requestContext.locationEndpointToRoute, HttpConstants.SubStatusCodes.UNKNOWN)); } From d07e81074eb100410aa3143f8d2c2a3e41e41194 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 23 Apr 2024 19:18:33 -0400 Subject: [PATCH 017/140] Updated CHANGELOG.md. --- .../cosmos/implementation/RxDocumentClientImpl.java | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 622ea4b05b99..f32c9a2441c5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2213,10 +2213,13 @@ private Mono handleRegionFeedbackForPointOperation( boolean isRequestHedged) { return response.doOnError(throwable -> { - if (throwable instanceof OperationCancelledException) { - this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); - } else if (throwable instanceof ServiceUnavailableException) { - this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + + if (!isRequestHedged) { + if (throwable instanceof OperationCancelledException) { + this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + } else if (throwable instanceof ServiceUnavailableException) { + this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + } } }); } From 5c0468e910f7b7d221aa4f51ec12cc1a6d8e95c6 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 24 Apr 2024 14:00:22 -0400 Subject: [PATCH 018/140] Fixing partition state transition logic. --- .../PartitionLevelCircuitBreakerTests.java | 167 ++++++++++++++++++ .../azure/cosmos/implementation/Configs.java | 6 +- .../implementation/CosmosSchedulers.java | 5 + ...itionEndpointManagerForCircuitBreaker.java | 44 +++-- .../implementation/RxDocumentClientImpl.java | 3 + 5 files changed, 208 insertions(+), 17 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java new file mode 100644 index 000000000000..e52b6c22ac69 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -0,0 +1,167 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + + +import com.azure.cosmos.ConnectionMode; +import com.azure.cosmos.CosmosAsyncClient; +import com.azure.cosmos.CosmosAsyncContainer; +import com.azure.cosmos.CosmosAsyncDatabase; +import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.TestObject; +import com.azure.cosmos.faultinjection.FaultInjectionTestBase; +import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; +import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.models.ThroughputProperties; +import com.azure.cosmos.test.faultinjection.CosmosFaultInjectionHelper; +import com.azure.cosmos.test.faultinjection.FaultInjectionCondition; +import com.azure.cosmos.test.faultinjection.FaultInjectionConditionBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionConnectionType; +import com.azure.cosmos.test.faultinjection.FaultInjectionEndpointBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionOperationType; +import com.azure.cosmos.test.faultinjection.FaultInjectionResultBuilders; +import com.azure.cosmos.test.faultinjection.FaultInjectionRule; +import com.azure.cosmos.test.faultinjection.FaultInjectionRuleBuilder; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorResult; +import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; +import org.testng.SkipException; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; + +import static org.testng.Assert.fail; + + +public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { + + private List writeRegions; + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public PartitionLevelCircuitBreakerTests(CosmosClientBuilder cosmosClientBuilder) { + super(cosmosClientBuilder); + } + + @BeforeClass(groups = {"multi-master"}) + public void beforeClass() { + try (CosmosAsyncClient testClient = getClientBuilder().buildAsyncClient()) { + RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(testClient); + GlobalEndpointManager globalEndpointManager = documentClient.getGlobalEndpointManager(); + + DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); + this.writeRegions = new ArrayList<>(this.getRegionMap(databaseAccount, true).keySet()); + } finally { + logger.debug("beforeClass executed..."); + } + } + + @Test(groups = {"multi-master"}) + public void readHits503InPrimaryRegion() { + + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("readHits503InPrimaryRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + TestObject testObject = TestObject.create(); + + String itemIdMappingToUnhealthyPartition = testObject.getId(); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.READ_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(13) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) + .block(); + + container.createItem(testObject).block(); + + for (int i = 1; i <= 15; i++) { + CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + } + + CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); + logger.info("Sleep for 60 seconds"); + + Thread.sleep(60_000); + + for (int i = 1; i <= 30; i++) { + response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + } + + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Read operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + + private Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { + Iterator locationIterator = + writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); + Map regionMap = new ConcurrentHashMap<>(); + + while (locationIterator.hasNext()) { + DatabaseAccountLocation accountLocation = locationIterator.next(); + regionMap.put(accountLocation.getName(), accountLocation.getEndpoint()); + } + + return regionMap; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index f7d8b3c9c359..e3b9725c649a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -170,6 +170,8 @@ public class Configs { public static final int MIN_MAX_RETRIES_IN_LOCAL_REGION_WHEN_REMOTE_REGION_PREFERRED = 1; public static final String TCP_CONNECTION_ACQUISITION_TIMEOUT_IN_MS = "COSMOS.TCP_CONNECTION_ACQUISITION_TIMEOUT_IN_MS"; + + private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"; private static final boolean DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED = false; public Configs() { @@ -497,6 +499,8 @@ public static Duration getTcpConnectionAcquisitionTimeout(int defaultValueInMs) } public static boolean isPartitionLevelCircuitBreakerEnabled() { - return DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED; + return getJVMConfigAsBoolean( + PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED, + DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java index b5fc8875aee2..e889c41408ed 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosSchedulers.java @@ -69,4 +69,9 @@ public class CosmosSchedulers { TTL_FOR_SCHEDULER_WORKER_IN_SECONDS, true ); + + public final static Scheduler PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE = Schedulers.newSingle( + "partition-availability-staleness-check", + true + ); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 6e6e3fd6a94e..2d2473f62f79 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -26,14 +26,13 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker implements IGlobalP private final GlobalEndpointManager globalEndpointManager; private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; - public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; } public void init() { - + this.updateStaleLocationInfo().subscribeOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE).subscribe(); } @Override @@ -191,21 +190,27 @@ public boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest re } private Flux updateStaleLocationInfo() { - return Mono.just(1).repeat().delayElements(Duration.ofSeconds(60)).flatMap(ignore -> { + return Mono.just(1) + .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) + .repeat() + .delayElements(Duration.ofSeconds(60)) + .flatMap(ignore -> { + + logger.info("Background updateStaleLocationInfo kicking in..."); - for (Map.Entry pkRangeToFailoverInfo : this.partitionKeyRangeToFailoverInfo.entrySet()) { + for (Map.Entry pkRangeToFailoverInfo : this.partitionKeyRangeToFailoverInfo.entrySet()) { - PartitionLevelFailoverInfo partitionLevelFailoverInfo = pkRangeToFailoverInfo.getValue(); + PartitionLevelFailoverInfo partitionLevelFailoverInfo = pkRangeToFailoverInfo.getValue(); - for (Map.Entry locationToLocationLevelMetrics : partitionLevelFailoverInfo.partitionLevelFailureMetadata.entrySet()) { + for (Map.Entry locationToLocationLevelMetrics : partitionLevelFailoverInfo.partitionLevelFailureMetadata.entrySet()) { - LocationLevelMetrics locationLevelMetrics = locationToLocationLevelMetrics.getValue(); - locationLevelMetrics.handleSuccess(false); + LocationLevelMetrics locationLevelMetrics = locationToLocationLevelMetrics.getValue(); + locationLevelMetrics.handleSuccess(false); + } } - } - return Mono.empty(); - }); + return Mono.empty(); + }); } static class PartitionLevelFailoverInfo { @@ -256,11 +261,10 @@ public boolean isFailureThresholdBreachedForLocation(RxDocumentServiceRequest re public void bookmarkSuccess(URI succeededLocation) { this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, locationLevelMetricsAsVal) -> { - if (locationLevelMetricsAsVal == null) { - return new LocationLevelMetrics(); + if (locationLevelMetricsAsVal != null) { + locationLevelMetricsAsVal.handleSuccess(false);; } - locationLevelMetricsAsVal.handleSuccess(false); return locationLevelMetricsAsVal; }); } @@ -334,18 +338,22 @@ public void handleSuccess(boolean forceStateChange) { successCount.incrementAndGet(); } else { if ((double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); + logger.info("Partition marked as Available"); } } } break; case FreshUnavailable: if (!forceStateChange) { - if (Duration.between(this.unavailableSince.get(), Instant.now()).compareTo(Duration.ofSeconds(120)) == 1) { + if (Duration.between(this.unavailableSince.get(), Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + logger.info("Partition marked as StaleUnavailable"); } } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); + logger.info("Partition marked as StaleUnavailable"); } break; default: @@ -365,13 +373,17 @@ public void handleFailure(int errorCount) { failureCount.addAndGet(errorCount); } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); + logger.info("Partition marked as FreshUnavailable from Available"); } + break; case StaleUnavailable: if (failureCount.get() < allowedFailureCount) { failureCount.addAndGet(errorCount); } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); + logger.info("Partition marked as FreshUnavailable from StaleUnavailable"); } + break; default: throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); } @@ -423,7 +435,7 @@ private static double getAllowedFailureRatioByStatus(PartitionScopedRegionUnavai case StaleUnavailable: return 0.1d; default: - throw new IllegalStateException("Unsupported health status: " + status); + return 0d; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index f32c9a2441c5..a5bc6801e8c6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -510,6 +510,9 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.globalEndpointManager = new GlobalEndpointManager(asDatabaseAccountManagerInternal(), this.connectionPolicy, /**/configs); this.globalPartitionEndpointManager = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); + + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).init(); + this.retryPolicy = new RetryPolicy( this, this.globalEndpointManager, From 647dcc641b365ee9144e6d364791d2029f6c010b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 26 Apr 2024 12:07:14 -0400 Subject: [PATCH 019/140] Modify logger level. --- .../PartitionLevelCircuitBreakerTests.java | 20 ++- .../DocumentServiceRequestContext.java | 10 ++ .../implementation/GlobalEndpointManager.java | 14 +- ...itionEndpointManagerForCircuitBreaker.java | 139 ++++++-------- .../IGlobalPartitionEndpointManager.java | 1 - .../cosmos/implementation/RequestOptions.java | 11 ++ .../implementation/RxDocumentClientImpl.java | 169 ++++++++++++++---- .../implementation/RxGatewayStoreModel.java | 4 - .../directconnectivity/AddressResolver.java | 6 - .../implementation/routing/LocationCache.java | 43 ++++- 10 files changed, 275 insertions(+), 142 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index e52b6c22ac69..c60fbec055b7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -13,6 +13,7 @@ import com.azure.cosmos.faultinjection.FaultInjectionTestBase; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; import com.azure.cosmos.models.FeedRange; import com.azure.cosmos.models.PartitionKey; @@ -41,6 +42,7 @@ import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; +import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.fail; @@ -121,11 +123,18 @@ public void readHits503InPrimaryRegion() { .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) .block(); - container.createItem(testObject).block(); + container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); for (int i = 1; i <= 15; i++) { CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); } CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); @@ -136,6 +145,13 @@ public void readHits503InPrimaryRegion() { for (int i = 1; i <= 30; i++) { response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); } @@ -152,7 +168,7 @@ public void readHits503InPrimaryRegion() { } } - private Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { + private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { Iterator locationIterator = writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); Map regionMap = new ConcurrentHashMap<>(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index a75133795e68..ef3bd6fd8668 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -49,6 +49,7 @@ public class DocumentServiceRequestContext implements Cloneable { private CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig; private AtomicBoolean isRequestCancelledOnTimeout = null; private volatile List excludeRegions; + private volatile List unavailableRegionsForPartition; private volatile boolean isRequestHedged = false; // For cancelled rntbd requests, track the response as OperationCancelledException which later will be used to populate the cosmosDiagnostics @@ -135,6 +136,7 @@ public DocumentServiceRequestContext clone() { context.throughputControlCycleId = this.throughputControlCycleId; context.replicaAddressValidationEnabled = this.replicaAddressValidationEnabled; context.endToEndOperationLatencyPolicyConfig = this.endToEndOperationLatencyPolicyConfig; + context.unavailableRegionsForPartition = this.unavailableRegionsForPartition; return context; } @@ -169,5 +171,13 @@ public void setIsRequestHedged(boolean isRequestHedged) { public boolean isRequestHedged() { return this.isRequestHedged; } + + public List getUnavailableRegionsForPartition() { + return unavailableRegionsForPartition; + } + + public void setUnavailableRegionsForPartition(List unavailableRegionsForPartition) { + this.unavailableRegionsForPartition = unavailableRegionsForPartition; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index 22a5725f40b4..474445cc866c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -47,6 +47,8 @@ public class GlobalEndpointManager implements AutoCloseable { private volatile Throwable latestDatabaseRefreshError; + private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; + public void setLatestDatabaseRefreshError(Throwable latestDatabaseRefreshError) { this.latestDatabaseRefreshError = latestDatabaseRefreshError; } @@ -104,12 +106,12 @@ public UnmodifiableList getApplicableWriteEndpoints(RxDocumentServiceReques public UnmodifiableList getApplicableReadEndpoints(List excludedRegions) { // readonly - return this.locationCache.getApplicableReadEndpoints(excludedRegions); + return this.locationCache.getApplicableReadEndpoints(excludedRegions, new ArrayList<>()); } public UnmodifiableList getApplicableWriteEndpoints(List excludedRegions) { //readonly - return this.locationCache.getApplicableWriteEndpoints(excludedRegions); + return this.locationCache.getApplicableWriteEndpoints(excludedRegions, new ArrayList<>()); } public List getAvailableReadEndpoints() { @@ -336,4 +338,12 @@ public boolean isClosed() { public String getRegionName(URI locationEndpoint, OperationType operationType) { return this.locationCache.getRegionName(locationEndpoint, operationType); } + + public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { + return globalPartitionEndpointManagerForCircuitBreaker; + } + + public void setGlobalPartitionEndpointManagerForCircuitBreaker(GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { + this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 2d2473f62f79..db98349cc735 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -12,6 +12,7 @@ import java.net.URI; import java.time.Duration; import java.time.Instant; +import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -19,12 +20,14 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; + public class GlobalPartitionEndpointManagerForCircuitBreaker implements IGlobalPartitionEndpointManager { private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); private final GlobalEndpointManager globalEndpointManager; - private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; + private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); @@ -64,7 +67,7 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfo(); + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); } isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(request)); @@ -122,7 +125,7 @@ public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceReq this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelFailoverInfo(); + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); } partitionKeyRangeFailoverInfoAsVal.bookmarkSuccess(succeededLocation); @@ -133,60 +136,30 @@ public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceReq return false; } - @Override - public boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest request) { - - if (request.isMetadataRequest()) { - return true; - } - - if (request == null) { - throw new IllegalArgumentException("request cannot be null!"); - } - - if (request.requestContext == null) { - - if (logger.isDebugEnabled()) { - logger.warn("requestContext is null!"); - } - - return false; - } - - PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; - - if (partitionKeyRange == null) { - throw new IllegalStateException("requestContext.resolvedPartitionKeyRange cannot be null!"); - } + public List getUnavailableLocationsForPartition(PartitionKeyRange partitionKeyRange) { - URI locationWithUndeterminedAvailability = request.requestContext.locationEndpointToRoute; + checkNotNull(partitionKeyRange, "Supplied partitionKeyRange cannot be null!"); - if (locationWithUndeterminedAvailability == null) { - throw new IllegalStateException("requestContext.locationEndpointToRoute cannot be null!"); - } + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = + this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); - if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { + List unavailableLocations = new ArrayList<>(); - // is it possible for this instance to go stale? - PartitionLevelFailoverInfo partitionLevelFailoverInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { + Map locationEndpointToFailureMetricsForPartition = + partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToFailureMetricsForPartition; - if (partitionLevelFailoverInfo.partitionLevelFailureMetadata.containsKey(locationWithUndeterminedAvailability)) { + for (Map.Entry pair : locationEndpointToFailureMetricsForPartition.entrySet()) { + URI location = pair.getKey(); + FailureMetricsForPartition failureMetricsForPartition = pair.getValue(); - LocationLevelMetrics locationLevelMetrics - = partitionLevelFailoverInfo.partitionLevelFailureMetadata.get(locationWithUndeterminedAvailability); - - if (locationLevelMetrics.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable) { - return false; + if (failureMetricsForPartition.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable) { + unavailableLocations.add(location); } } - - // there is no locationLevelFailureMetadata for locationWithUndeterminedAvailability - // [or] locationWithUndeterminedAvailability is still available / is stale unavailable - return true; } - // there is no partitionLevelFailoverInfo for partitionKeyRange - return true; + return UnmodifiableList.unmodifiableList(unavailableLocations); } private Flux updateStaleLocationInfo() { @@ -198,14 +171,14 @@ private Flux updateStaleLocationInfo() { logger.info("Background updateStaleLocationInfo kicking in..."); - for (Map.Entry pkRangeToFailoverInfo : this.partitionKeyRangeToFailoverInfo.entrySet()) { + for (Map.Entry pkRangeToFailoverInfo : this.partitionKeyRangeToFailoverInfo.entrySet()) { - PartitionLevelFailoverInfo partitionLevelFailoverInfo = pkRangeToFailoverInfo.getValue(); + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = pkRangeToFailoverInfo.getValue(); - for (Map.Entry locationToLocationLevelMetrics : partitionLevelFailoverInfo.partitionLevelFailureMetadata.entrySet()) { + for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToFailureMetricsForPartition.entrySet()) { - LocationLevelMetrics locationLevelMetrics = locationToLocationLevelMetrics.getValue(); - locationLevelMetrics.handleSuccess(false); + FailureMetricsForPartition failureMetricsForPartition = locationToLocationLevelMetrics.getValue(); + failureMetricsForPartition.handleSuccess(false); } } @@ -213,12 +186,12 @@ private Flux updateStaleLocationInfo() { }); } - static class PartitionLevelFailoverInfo { + private static class PartitionLevelLocationUnavailabilityInfo { - private final ConcurrentHashMap partitionLevelFailureMetadata; + private final ConcurrentHashMap locationEndpointToFailureMetricsForPartition; - PartitionLevelFailoverInfo() { - this.partitionLevelFailureMetadata = new ConcurrentHashMap<>(); + PartitionLevelLocationUnavailabilityInfo() { + this.locationEndpointToFailureMetricsForPartition = new ConcurrentHashMap<>(); } public boolean isFailureThresholdBreachedForLocation(RxDocumentServiceRequest request) { @@ -239,18 +212,18 @@ public boolean isFailureThresholdBreachedForLocation(RxDocumentServiceRequest re URI location = failuresPerLocation.getKey(); ConcurrentHashMap errorCounts = failuresPerLocation.getValue(); - this.partitionLevelFailureMetadata.compute(location, (locationAsKey, locationLevelMetricsAsVal) -> { + this.locationEndpointToFailureMetricsForPartition.compute(location, (locationAsKey, failureMetricsForPartitionAsVal) -> { - if (locationLevelMetricsAsVal == null) { - locationLevelMetricsAsVal = new LocationLevelMetrics(); + if (failureMetricsForPartitionAsVal == null) { + failureMetricsForPartitionAsVal = new FailureMetricsForPartition(); } for (Map.Entry countForError : errorCounts.entrySet()) { - locationLevelMetricsAsVal.handleFailure(countForError.getValue()); + failureMetricsForPartitionAsVal.handleFailure(countForError.getValue()); } - isFailureThresholdBreached.set(locationLevelMetricsAsVal.isFailureThresholdBreached()); - return locationLevelMetricsAsVal; + isFailureThresholdBreached.set(failureMetricsForPartitionAsVal.isFailureThresholdBreached()); + return failureMetricsForPartitionAsVal; }); } } @@ -259,46 +232,46 @@ public boolean isFailureThresholdBreachedForLocation(RxDocumentServiceRequest re } public void bookmarkSuccess(URI succeededLocation) { - this.partitionLevelFailureMetadata.compute(succeededLocation, (locationAsKey, locationLevelMetricsAsVal) -> { + this.locationEndpointToFailureMetricsForPartition.compute(succeededLocation, (locationAsKey, failureMetricsForPartitionAsVal) -> { - if (locationLevelMetricsAsVal != null) { - locationLevelMetricsAsVal.handleSuccess(false);; + if (failureMetricsForPartitionAsVal != null) { + failureMetricsForPartitionAsVal.handleSuccess(false);; } - return locationLevelMetricsAsVal; + return failureMetricsForPartitionAsVal; }); } public boolean areLocationsAvailableForPartitionKeyRange(List availableLocationsAtAccountLevel) { for (URI availableLocation : availableLocationsAtAccountLevel) { - if (!this.partitionLevelFailureMetadata.containsKey(availableLocation)) { + if (!this.locationEndpointToFailureMetricsForPartition.containsKey(availableLocation)) { return true; } else { - LocationLevelMetrics locationLevelMetrics = this.partitionLevelFailureMetadata.get(availableLocation); + FailureMetricsForPartition failureMetricsForPartition = this.locationEndpointToFailureMetricsForPartition.get(availableLocation); - if (locationLevelMetrics.isRegionAvailableToProcessRequests()) { + if (failureMetricsForPartition.isRegionAvailableToProcessRequests()) { return true; } } } Instant mostStaleUnavailableTimeAcrossRegions = Instant.MAX; - LocationLevelMetrics locationLevelFailureMetadataForMostStaleLocation = null; + FailureMetricsForPartition locationLevelFailureMetadataForMostStaleLocation = null; // find region with most 'stale' unavailability - for (Map.Entry uriToLocationLevelFailureMetadata : this.partitionLevelFailureMetadata.entrySet()) { - LocationLevelMetrics locationLevelMetrics = uriToLocationLevelFailureMetadata.getValue(); + for (Map.Entry uriToLocationLevelFailureMetadata : this.locationEndpointToFailureMetricsForPartition.entrySet()) { + FailureMetricsForPartition failureMetricsForPartition = uriToLocationLevelFailureMetadata.getValue(); - if (locationLevelMetrics.isRegionAvailableToProcessRequests()) { + if (failureMetricsForPartition.isRegionAvailableToProcessRequests()) { return true; } - Instant unavailableSinceSnapshot = locationLevelMetrics.unavailableSince.get(); + Instant unavailableSinceSnapshot = failureMetricsForPartition.unavailableSince.get(); if (mostStaleUnavailableTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { mostStaleUnavailableTimeAcrossRegions = unavailableSinceSnapshot; - locationLevelFailureMetadataForMostStaleLocation = locationLevelMetrics; + locationLevelFailureMetadataForMostStaleLocation = failureMetricsForPartition; } } @@ -311,7 +284,7 @@ public boolean areLocationsAvailableForPartitionKeyRange(List availableLoca } } - private static class LocationLevelMetrics { + private static class FailureMetricsForPartition { private final AtomicInteger failureCount = new AtomicInteger(0); private final AtomicInteger successCount = new AtomicInteger(0); private final AtomicReference unavailableSince = new AtomicReference<>(Instant.MAX); @@ -334,14 +307,10 @@ public void handleSuccess(boolean forceStateChange) { break; case StaleUnavailable: if (!forceStateChange) { - if (successCount.get() < 10) { - successCount.incrementAndGet(); - } else { - if ((double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { - - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); - logger.info("Partition marked as Available"); - } + successCount.incrementAndGet(); + if (successCount.get() > 10 && (double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { + this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); + logger.info("Partition marked as Available"); } } break; @@ -465,7 +434,7 @@ public boolean isRegionUnavailableToProcessRequest() { } } - enum PartitionScopedRegionUnavailabilityStatus { + private enum PartitionScopedRegionUnavailabilityStatus { Available(100), FreshUnavailable(200), StaleUnavailable(300); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java index f80c6e3db967..8c1c327fb004 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java @@ -6,5 +6,4 @@ public interface IGlobalPartitionEndpointManager { boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request); boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request); - boolean isRegionAvailableForPartitionKeyRange(RxDocumentServiceRequest request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java index 38089dbe24a8..67c2bc3cf1de 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java @@ -57,6 +57,8 @@ public class RequestOptions { private final AtomicReference markE2ETimeoutInRequestContextCallbackHook; + private PartitionKeyRange resolvedPartitionKeyRange; + public RequestOptions() { this.markE2ETimeoutInRequestContextCallbackHook = new AtomicReference<>(null); } @@ -85,6 +87,7 @@ public RequestOptions(RequestOptions toBeCloned) { this.endToEndOperationLatencyConfig = toBeCloned.endToEndOperationLatencyConfig; this.diagnosticsCtxSupplier = toBeCloned.diagnosticsCtxSupplier; this.markE2ETimeoutInRequestContextCallbackHook = new AtomicReference<>(null); + this.resolvedPartitionKeyRange = toBeCloned.resolvedPartitionKeyRange; if (toBeCloned.customOptions != null) { this.customOptions = new HashMap<>(toBeCloned.customOptions); @@ -536,4 +539,12 @@ public void setExcludeRegions(List excludeRegions) { public AtomicReference getMarkE2ETimeoutInRequestContextCallbackHook() { return this.markE2ETimeoutInRequestContextCallbackHook; } + + public void setResolvedPartitionKeyRange(PartitionKeyRange resolvedPartitionKeyRange) { + this.resolvedPartitionKeyRange = resolvedPartitionKeyRange; + } + + public PartitionKeyRange getResolvedPartitionKeyRange() { + return resolvedPartitionKeyRange; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index a5bc6801e8c6..f00923c1f59a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2170,6 +2170,15 @@ private Mono> createDocumentInternal( return requestObs .flatMap(request -> { + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + List unavailableLocationsForPartition = + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + documentServiceRequestReference.set(request); return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); }) @@ -2349,6 +2358,15 @@ private Mono> upsertDocumentInternal( return reqObs .flatMap(request -> { requestReference.set(request); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + List unavailableLocationsForPartition = + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -2584,8 +2602,20 @@ private Mono> replaceDocumentInternal( addPartitionKeyInformation(request, content, document, options, collectionObs); return requestObs - .flatMap(req -> replace(request, retryPolicyInstance) - .map(resp -> toResourceResponse(resp, Document.class))); + .flatMap(req -> { + requestReference.set(req); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + List unavailableLocationsForPartition = + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + + return replace(request, retryPolicyInstance); + }) + .map(resp -> toResourceResponse(resp, Document.class)); } private CosmosEndToEndOperationLatencyPolicyConfig getEndToEndOperationLatencyPolicyConfig( @@ -2738,6 +2768,15 @@ private Mono> patchDocumentInternal( return requestObs .flatMap(req -> { requestReference.set(req); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + List unavailableLocationsForPartition = + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + return patch(request, retryPolicyInstance); }) .map(resp -> toResourceResponse(resp, Document.class)); @@ -2852,6 +2891,15 @@ private Mono> deleteDocumentInternal( return requestObs .flatMap(req -> { requestReference.set(req); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + List unavailableLocationsForPartition = + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -2971,6 +3019,24 @@ private Mono> readDocumentInternal( () -> request.requestContext.setIsRequestCancelledOnTimeout(new AtomicBoolean(true))); request.requestContext.setExcludeRegions(options.getExcludeRegions()); + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + List unavailableLocationsForPartition = + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); + + logger.info("Printing unavailable region for partition"); + + for (String unavailableRegionForPartition : unavailableRegionsForPartition) { + logger.info("Unavailable region : {}", unavailableRegionForPartition); + } + + if (!unavailableRegionsForPartition.isEmpty()) { + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + if (retryPolicyInstance != null) { retryPolicyInstance.onBeforeSendRequest(request); } @@ -2981,6 +3047,8 @@ private Mono> readDocumentInternal( return requestObs.flatMap(req -> { requestReference.set(req); + + return this.read(request, retryPolicyInstance) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); }); @@ -5442,41 +5510,55 @@ private Mono> wrapPointOperationWithAvailabilityStrat DiagnosticsClientContext innerDiagnosticsFactory, String collectionLink) { - checkNotNull(resourceType, "Argument 'resourceType' must not be null."); - checkNotNull(operationType, "Argument 'operationType' must not be null."); - checkNotNull(callback, "Argument 'callback' must not be null."); +// checkNotNull(resourceType, "Argument 'resourceType' must not be null."); +// checkNotNull(operationType, "Argument 'operationType' must not be null."); +// checkNotNull(callback, "Argument 'callback' must not be null."); +// +// final RequestOptions nonNullRequestOptions = +// initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); +// +// checkArgument( +// resourceType == ResourceType.Document, +// "This method can only be used for document point operations."); +// +// CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = +// getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); +// +// List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( +// endToEndPolicyConfig, +// resourceType, +// operationType, +// idempotentWriteRetriesEnabled, +// nonNullRequestOptions); +// +// if (orderedApplicableRegionsForSpeculation.size() < 2) { +// // There is at most one applicable region - no hedging possible +// return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false); +// } +// +// ThresholdBasedAvailabilityStrategy availabilityStrategy = +// (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); +// List> monoList = new ArrayList<>(); +// +// final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); - final RequestOptions nonNullRequestOptions = - initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); + return this.collectionCache.resolveByNameAsync(null, collectionLink, null) + .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { - checkArgument( - resourceType == ResourceType.Document, - "This method can only be used for document point operations."); + checkNotNull(resourceType, "Argument 'resourceType' must not be null."); + checkNotNull(operationType, "Argument 'operationType' must not be null."); + checkNotNull(callback, "Argument 'callback' must not be null."); - CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = - getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); + final RequestOptions nonNullRequestOptions = + initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); - List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( - endToEndPolicyConfig, - resourceType, - operationType, - idempotentWriteRetriesEnabled, - nonNullRequestOptions); + checkArgument( + resourceType == ResourceType.Document, + "This method can only be used for document point operations."); - if (orderedApplicableRegionsForSpeculation.size() < 2) { - // There is at most one applicable region - no hedging possible - return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false); - } - - ThresholdBasedAvailabilityStrategy availabilityStrategy = - (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); - List> monoList = new ArrayList<>(); - - final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); - - return this.collectionCache.resolveByNameAsync(null, collectionLink, null) - .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) - .flatMap(collectionRoutingMapValueHolder -> { + CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = + getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); PartitionKeyDefinition partitionKeyDefinition = collection.getPartitionKey(); PartitionKey partitionKey = nonNullRequestOptions.getPartitionKey(); @@ -5492,6 +5574,26 @@ private Mono> wrapPointOperationWithAvailabilityStrat ModelBridgeInternal.getPartitionKeyInternal(partitionKey), partitionKeyDefinition)); + nonNullRequestOptions.setResolvedPartitionKeyRange(partitionKeyRange); + + List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( + endToEndPolicyConfig, + resourceType, + operationType, + idempotentWriteRetriesEnabled, + nonNullRequestOptions); + + if (orderedApplicableRegionsForSpeculation.size() < 2) { + // There is at most one applicable region - no hedging possible + return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false); + } + + ThresholdBasedAvailabilityStrategy availabilityStrategy = + (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); + List> monoList = new ArrayList<>(); + + final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); + orderedApplicableRegionsForSpeculation .forEach(region -> { RequestOptions clonedOptions = new RequestOptions(nonNullRequestOptions); @@ -5813,8 +5915,7 @@ private Mono executeFeedOperationWithAvailabilityStrategy( resourceType, operationType, false, - initialExcludedRegions - ); + initialExcludedRegions); if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 0a290f6b506e..7d9999adf593 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -544,10 +544,6 @@ private Mono invokeAsyncInternal(RxDocumentServiceReq private Mono invokeAsync(RxDocumentServiceRequest request) { - if (!this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { - return Mono.error(new ServiceUnavailableException("PkRange is unavailable at region", null, request.requestContext.locationEndpointToRoute, HttpConstants.SubStatusCodes.UNKNOWN)); - } - Callable> funcDelegate = () -> invokeAsyncInternal(request).single().doOnSuccess(ignore -> this.globalPartitionEndpointManager.tryBookmarkRegionSuccessForPartitionKeyRange(request)); MetadataRequestRetryPolicy metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(this.globalEndpointManager); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index 6dc58e0dc249..3f2ffc9109f4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -86,12 +86,6 @@ public Mono resolveAsync( } request.requestContext.resolvedPartitionKeyRange = result.TargetPartitionKeyRange; - - // TODO: use GlobalPartitionEndpointManager to add a partition-level request override - if (!this.globalPartitionEndpointManager.isRegionAvailableForPartitionKeyRange(request)) { - return Mono.error(new ServiceUnavailableException("PkRange is unavailable at region", null, request.requestContext.locationEndpointToRoute, HttpConstants.SubStatusCodes.UNKNOWN)); - } - return Mono.just(result.Addresses); }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index a7bbbfc70855..be717b0849d0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -9,6 +9,8 @@ import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.DatabaseAccount; import com.azure.cosmos.implementation.DatabaseAccountLocation; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.Strings; @@ -205,6 +207,8 @@ public URI resolveServiceEndpoint(RxDocumentServiceRequest request) { return this.defaultEndpoint; } } else { + + logger.info("In resolveServiceEndpoint"); UnmodifiableList endpoints = request.getOperationType().isWriteOperation()? this.getApplicableWriteEndpoints(request) : this.getApplicableReadEndpoints(request); return endpoints.get(locationIndex % endpoints.size()); @@ -212,10 +216,11 @@ public URI resolveServiceEndpoint(RxDocumentServiceRequest request) { } public UnmodifiableList getApplicableWriteEndpoints(RxDocumentServiceRequest request) { - return this.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); + logger.info("In getApplicableWriteEndpoints with RxDocumentServiceRequest request"); + return this.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions(), request.requestContext.getUnavailableRegionsForPartition()); } - public UnmodifiableList getApplicableWriteEndpoints(List excludedRegionsOnRequest) { + public UnmodifiableList getApplicableWriteEndpoints(List excludedRegionsOnRequest, List unavailableRegionsForPartition) { UnmodifiableList writeEndpoints = this.getWriteEndpoints(); Supplier excludedRegionsSupplier = this.connectionPolicy.getExcludedRegionsSupplier(); @@ -223,7 +228,7 @@ public UnmodifiableList getApplicableWriteEndpoints(List excludedRe List effectiveExcludedRegions = isExcludedRegionsSupplierConfigured(excludedRegionsSupplier) ? new ArrayList<>(excludedRegionsSupplier.get().getExcludedRegions()) : Collections.emptyList(); - if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions)) { + if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions) && (unavailableRegionsForPartition == null || unavailableRegionsForPartition.isEmpty())) { return writeEndpoints; } @@ -231,26 +236,37 @@ public UnmodifiableList getApplicableWriteEndpoints(List excludedRe effectiveExcludedRegions = excludedRegionsOnRequest; } + List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); + + logger.info("Printing unavailable location for partition"); + + for (String unavailableRegionForPartition : unavailableRegionsForPartition) { + logger.info("Unavailable region : {}", unavailableRegionForPartition); + } + + effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); + // filter regions based on the exclude region config return this.getApplicableEndpoints( writeEndpoints, this.locationInfo.regionNameByWriteEndpoint, this.defaultEndpoint, - effectiveExcludedRegions); + effectiveExcludedRegionsWithPartitionUnavailableRegions); } public UnmodifiableList getApplicableReadEndpoints(RxDocumentServiceRequest request) { - return this.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()); + logger.info("In getApplicableReadEndpoints with RxDocumentServiceRequest request"); + return this.getApplicableReadEndpoints(request.requestContext.getExcludeRegions(), request.requestContext.getUnavailableRegionsForPartition()); } - public UnmodifiableList getApplicableReadEndpoints(List excludedRegionsOnRequest) { + public UnmodifiableList getApplicableReadEndpoints(List excludedRegionsOnRequest, List unavailableRegionsForPartition) { UnmodifiableList readEndpoints = this.getReadEndpoints(); Supplier excludedRegionsSupplier = this.connectionPolicy.getExcludedRegionsSupplier(); List effectiveExcludedRegions = isExcludedRegionsSupplierConfigured(excludedRegionsSupplier) ? new ArrayList<>(excludedRegionsSupplier.get().getExcludedRegions()) : Collections.emptyList(); - if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions)) { + if (!isExcludeRegionsConfigured(excludedRegionsOnRequest, effectiveExcludedRegions) && (unavailableRegionsForPartition == null || unavailableRegionsForPartition.isEmpty())) { return readEndpoints; } @@ -258,12 +274,21 @@ public UnmodifiableList getApplicableReadEndpoints(List excludedReg effectiveExcludedRegions = excludedRegionsOnRequest; } + List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); + effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); + + logger.info("Printing unavailable region for partition"); + + for (String unavailableRegionForPartition : unavailableRegionsForPartition) { + logger.info("Unavailable region : {}", unavailableRegionForPartition); + } + // filter regions based on the exclude region config return this.getApplicableEndpoints( readEndpoints, this.locationInfo.regionNameByReadEndpoint, this.locationInfo.writeEndpoints.get(0), // match the fallback region used in getPreferredAvailableEndpoints - effectiveExcludedRegions); + effectiveExcludedRegionsWithPartitionUnavailableRegions); } private UnmodifiableList getApplicableEndpoints( @@ -617,6 +642,8 @@ private UnmodifiableList getPreferredAvailableEndpoints(UnmodifiableMap(endpoints); } + + private UnmodifiableMap getEndpointByLocation(Iterable locations, Utils.ValueHolder> orderedLocations, Utils.ValueHolder> regionMap) { From 79745759eb05ce3ee918224efe63e3b251c5eff1 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 29 Apr 2024 10:36:39 -0400 Subject: [PATCH 020/140] Adding a way to exclude partition-level unavailable regions. --- .../cosmos/implementation/RequestOptions.java | 14 +- .../implementation/RxDocumentClientImpl.java | 297 +++++++++--------- 2 files changed, 147 insertions(+), 164 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java index 67c2bc3cf1de..d44c7c9f89e5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java @@ -11,13 +11,13 @@ import com.azure.cosmos.models.DedicatedGatewayRequestOptions; import com.azure.cosmos.models.IndexingDirective; import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.ThroughputProperties; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.Callable; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Supplier; @@ -57,7 +57,7 @@ public class RequestOptions { private final AtomicReference markE2ETimeoutInRequestContextCallbackHook; - private PartitionKeyRange resolvedPartitionKeyRange; + private PartitionKeyDefinition partitionKeyDefinition; public RequestOptions() { this.markE2ETimeoutInRequestContextCallbackHook = new AtomicReference<>(null); @@ -87,7 +87,7 @@ public RequestOptions(RequestOptions toBeCloned) { this.endToEndOperationLatencyConfig = toBeCloned.endToEndOperationLatencyConfig; this.diagnosticsCtxSupplier = toBeCloned.diagnosticsCtxSupplier; this.markE2ETimeoutInRequestContextCallbackHook = new AtomicReference<>(null); - this.resolvedPartitionKeyRange = toBeCloned.resolvedPartitionKeyRange; + this.partitionKeyDefinition = toBeCloned.partitionKeyDefinition; if (toBeCloned.customOptions != null) { this.customOptions = new HashMap<>(toBeCloned.customOptions); @@ -540,11 +540,11 @@ public AtomicReference getMarkE2ETimeoutInRequestContextCallbackHook() return this.markE2ETimeoutInRequestContextCallbackHook; } - public void setResolvedPartitionKeyRange(PartitionKeyRange resolvedPartitionKeyRange) { - this.resolvedPartitionKeyRange = resolvedPartitionKeyRange; + public void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + this.partitionKeyDefinition = partitionKeyDefinition; } - public PartitionKeyRange getResolvedPartitionKeyRange() { - return resolvedPartitionKeyRange; + public PartitionKeyDefinition getPartitionKeyDefinition() { + return this.partitionKeyDefinition; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index f00923c1f59a..92eff425d155 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -216,7 +216,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization */ private final QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; private final GlobalEndpointManager globalEndpointManager; - private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; private final RetryPolicy retryPolicy; private HttpClient reactorHttpClient; private Function httpClientInterceptor; @@ -509,15 +509,15 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.reactorHttpClient = httpClient(); this.globalEndpointManager = new GlobalEndpointManager(asDatabaseAccountManagerInternal(), this.connectionPolicy, /**/configs); - this.globalPartitionEndpointManager = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); + this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).init(); + ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManagerForCircuitBreaker).init(); this.retryPolicy = new RetryPolicy( this, this.globalEndpointManager, this.connectionPolicy, - this.globalPartitionEndpointManager); + this.globalPartitionEndpointManagerForCircuitBreaker); this.resetSessionTokenRetryPolicy = retryPolicy; CpuMemoryMonitor.register(this); this.queryPlanCache = new ConcurrentHashMap<>(); @@ -600,7 +600,7 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.globalEndpointManager, this.reactorHttpClient, this.apiType, - this.globalPartitionEndpointManager); + this.globalPartitionEndpointManagerForCircuitBreaker); this.globalEndpointManager.init(); this.initializeGatewayConfigurationReader(); @@ -686,7 +686,7 @@ private void initializeDirectConnectivity() { null, this.connectionPolicy, this.apiType, - this.globalPartitionEndpointManager); + this.globalPartitionEndpointManagerForCircuitBreaker); this.storeClientFactory = new StoreClientFactory( this.addressResolver, @@ -1768,9 +1768,6 @@ private Mono getCreateDocumentRequest(DocumentClientRe request.requestContext.setExcludeRegions(options.getExcludeRegions()); } - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); - } SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); if (serializationDiagnosticsContext != null) { @@ -2104,14 +2101,15 @@ public Mono> createDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Create, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> createDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> createDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, - isRequestHedged), + isRequestHedged, + collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2124,7 +2122,8 @@ private Mono> createDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged) { + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap) { ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); DocumentClientRetryPolicy requestRetryPolicy = @@ -2148,7 +2147,8 @@ private Mono> createDocumentCore( disableAutomaticIdGeneration, finalRetryPolicyInstance, scopedDiagnosticsFactory, - requestReference), + requestReference, + collectionRoutingMap), requestRetryPolicy), scopedDiagnosticsFactory ), requestReference, isRequestHedged); @@ -2161,7 +2161,8 @@ private Mono> createDocumentInternal( boolean disableAutomaticIdGeneration, DocumentClientRetryPolicy requestRetryPolicy, DiagnosticsClientContext clientContextOverride, - AtomicReference documentServiceRequestReference) { + AtomicReference documentServiceRequestReference, + Utils.ValueHolder collectionRoutingMap) { try { logger.debug("Creating a Document. collectionLink: [{}]", collectionLink); @@ -2171,15 +2172,13 @@ private Mono> createDocumentInternal( return requestObs .flatMap(request -> { - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - List unavailableLocationsForPartition = - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + documentServiceRequestReference.set(request); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); - request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); } - documentServiceRequestReference.set(request); return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -2228,9 +2227,9 @@ private Mono handleRegionFeedbackForPointOperation( if (!isRequestHedged) { if (throwable instanceof OperationCancelledException) { - this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); } else if (throwable instanceof ServiceUnavailableException) { - this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); } } }); @@ -2291,8 +2290,8 @@ public Mono> upsertDocument(String collectionLink, Ob return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Upsert, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> upsertDocumentCore( - collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, isRequestHedged), + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> upsertDocumentCore( + collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2305,7 +2304,8 @@ private Mono> upsertDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged) { + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2328,7 +2328,8 @@ private Mono> upsertDocumentCore( disableAutomaticIdGeneration, finalRetryPolicyInstance, scopedDiagnosticsFactory, - requestReference), + requestReference, + collectionRoutingMap), finalRetryPolicyInstance), scopedDiagnosticsFactory), requestReference, isRequestHedged); } @@ -2340,7 +2341,8 @@ private Mono> upsertDocumentInternal( boolean disableAutomaticIdGeneration, DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, - AtomicReference requestReference) { + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap) { try { logger.debug("Upserting a Document. collectionLink: [{}]", collectionLink); @@ -2357,14 +2359,13 @@ private Mono> upsertDocumentInternal( return reqObs .flatMap(request -> { - requestReference.set(request); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - List unavailableLocationsForPartition = - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); - request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + requestReference.set(request); + + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); } return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); @@ -2384,13 +2385,14 @@ public Mono> replaceDocument(String documentLink, Obj return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> replaceDocumentCore( documentLink, document, opt, e2ecfg, clientCtxOverride, - isRequestHedged), + isRequestHedged, + collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2402,7 +2404,8 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged) { + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2427,7 +2430,8 @@ private Mono> replaceDocumentCore( finalRequestRetryPolicy, endToEndPolicyConfig, scopedDiagnosticsFactory, - requestReference), + requestReference, + collectionRoutingMap), requestRetryPolicy), scopedDiagnosticsFactory), requestReference, isRequestHedged); } @@ -2439,7 +2443,8 @@ private Mono> replaceDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - AtomicReference requestReference) { + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2458,7 +2463,8 @@ private Mono> replaceDocumentInternal( options, retryPolicyInstance, clientContextOverride, - requestReference); + requestReference, + collectionRoutingMap); } catch (Exception e) { logger.debug("Failure in replacing a document due to [{}]", e.getMessage()); @@ -2471,12 +2477,13 @@ public Mono> replaceDocument(Document document, Reque return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> replaceDocumentCore( document, opt, e2ecfg, clientCtxOverride, - isRequestHedged), + isRequestHedged, + collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2487,7 +2494,8 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged) { + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap) { DocumentClientRetryPolicy requestRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(clientContextOverride); @@ -2506,7 +2514,8 @@ private Mono> replaceDocumentCore( finalRequestRetryPolicy, endToEndPolicyConfig, clientContextOverride, - requestReference), + requestReference, + collectionRoutingMap), requestRetryPolicy), requestReference, isRequestHedged); } @@ -2516,7 +2525,8 @@ private Mono> replaceDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - AtomicReference requestReference) { + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap) { try { if (document == null) { @@ -2529,7 +2539,8 @@ private Mono> replaceDocumentInternal( options, retryPolicyInstance, clientContextOverride, - requestReference); + requestReference, + collectionRoutingMap); } catch (Exception e) { logger.debug("Failure in replacing a database due to [{}]", e.getMessage()); @@ -2543,7 +2554,8 @@ private Mono> replaceDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, - AtomicReference requestReference) { + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap) { if (document == null) { throw new IllegalArgumentException("document"); @@ -2584,9 +2596,6 @@ private Mono> replaceDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludeRegions()); } - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); @@ -2603,14 +2612,13 @@ private Mono> replaceDocumentInternal( return requestObs .flatMap(req -> { - requestReference.set(req); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - List unavailableLocationsForPartition = - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); - request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + requestReference.set(req); + + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); } return replace(request, retryPolicyInstance); @@ -2655,13 +2663,14 @@ public Mono> patchDocument(String documentLink, return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> patchDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> patchDocumentCore( documentLink, cosmosPatchOperations, opt, e2ecfg, clientCtxOverride, - isRequestHedged), + isRequestHedged, + collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2673,7 +2682,8 @@ private Mono> patchDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged) { + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2692,7 +2702,8 @@ private Mono> patchDocumentCore( nonNullRequestOptions, documentClientRetryPolicy, scopedDiagnosticsFactory, - requestReference), + requestReference, + collectionRoutingMap), documentClientRetryPolicy), scopedDiagnosticsFactory), requestReference, isRequestHedged); } @@ -2703,7 +2714,8 @@ private Mono> patchDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, - AtomicReference requestReference) { + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap) { checkArgument(StringUtils.isNotEmpty(documentLink), "expected non empty documentLink"); checkNotNull(cosmosPatchOperations, "expected non null cosmosPatchOperations"); @@ -2744,9 +2756,6 @@ private Mono> patchDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludeRegions()); } - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); @@ -2767,14 +2776,13 @@ private Mono> patchDocumentInternal( return requestObs .flatMap(req -> { - requestReference.set(req); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - List unavailableLocationsForPartition = - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); - request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + requestReference.set(req); + + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); } return patch(request, retryPolicyInstance); @@ -2787,13 +2795,14 @@ public Mono> deleteDocument(String documentLink, Requ return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> deleteDocumentCore( documentLink, null, opt, e2ecfg, clientCtxOverride, - isRequestHedged), + isRequestHedged, + collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2804,13 +2813,14 @@ public Mono> deleteDocument(String documentLink, Inte return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> deleteDocumentCore( documentLink, internalObjectNode, opt, e2ecfg, clientCtxOverride, - isRequestHedged), + isRequestHedged, + collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2822,7 +2832,8 @@ private Mono> deleteDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged) { + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2841,7 +2852,8 @@ private Mono> deleteDocumentCore( nonNullRequestOptions, requestRetryPolicy, scopedDiagnosticsFactory, - requestReference), + requestReference, + collectionRoutingMap), requestRetryPolicy), scopedDiagnosticsFactory), requestReference, isRequestHedged); } @@ -2852,7 +2864,8 @@ private Mono> deleteDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, - AtomicReference requestReference) { + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2876,10 +2889,6 @@ private Mono> deleteDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludeRegions()); } - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); @@ -2890,14 +2899,13 @@ private Mono> deleteDocumentInternal( return requestObs .flatMap(req -> { - requestReference.set(req); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - List unavailableLocationsForPartition = - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); - request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + requestReference.set(req); + + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); } return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); @@ -2961,7 +2969,7 @@ private Mono> readDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Read, - (opt, e2ecfg, clientCtxOverride, isRequestHedged) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, isRequestHedged), + (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap), options, false, innerDiagnosticsFactory, @@ -2973,7 +2981,8 @@ private Mono> readDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged) { + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2990,7 +2999,8 @@ private Mono> readDocumentCore( nonNullRequestOptions, retryPolicyInstance, scopedDiagnosticsFactory, - requestReference), + requestReference, + collectionRoutingMap), retryPolicyInstance), scopedDiagnosticsFactory ), requestReference, isRequestHedged); @@ -3001,7 +3011,8 @@ private Mono> readDocumentInternal( RequestOptions options, DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, - AtomicReference requestReference) { + AtomicReference requestReference, + Utils.ValueHolder collectionRoutingMap) { try { if (StringUtils.isEmpty(documentLink)) { @@ -3019,37 +3030,21 @@ private Mono> readDocumentInternal( () -> request.requestContext.setIsRequestCancelledOnTimeout(new AtomicBoolean(true))); request.requestContext.setExcludeRegions(options.getExcludeRegions()); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - List unavailableLocationsForPartition = - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManager).getUnavailableLocationsForPartition(options.getResolvedPartitionKeyRange()); - - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType())).collect(Collectors.toList()); - - logger.info("Printing unavailable region for partition"); - - for (String unavailableRegionForPartition : unavailableRegionsForPartition) { - logger.info("Unavailable region : {}", unavailableRegionForPartition); - } - - if (!unavailableRegionsForPartition.isEmpty()) { - request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); - } - request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); - } - - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs); return requestObs.flatMap(req -> { + + addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + requestReference.set(req); + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } - return this.read(request, retryPolicyInstance) + return this.read(req, retryPolicyInstance) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); }); @@ -5483,6 +5478,31 @@ static UUID randomUuid(long msb, long lsb) { return new UUID(msb, lsb); } + private void addPartitionLevelUnavailableRegionsForRequest( + RxDocumentServiceRequest request, + RequestOptions options, + CollectionRoutingMap collectionRoutingMap) { + + checkNotNull(options, "options cannot be null!"); + checkNotNull(options.getPartitionKeyDefinition(), "partitionKeyDefinition within options cannot be null!"); + checkNotNull(collectionRoutingMap, "collectionRoutingMap cannot be null!"); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + PartitionKeyDefinition partitionKeyDefinition = options.getPartitionKeyDefinition(); + PartitionKeyInternal partitionKeyInternal = request.getPartitionKeyInternal(); + + String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKeyInternal, partitionKeyDefinition); + PartitionKeyRange partitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + + checkNotNull(partitionKeyRange, "partitionKeyRange cannot be null!"); + checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationsForPartition(partitionKeyRange); + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); + + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + } + private Mono> wrapPointOperationWithAvailabilityStrategy( ResourceType resourceType, OperationType operationType, @@ -5510,38 +5530,6 @@ private Mono> wrapPointOperationWithAvailabilityStrat DiagnosticsClientContext innerDiagnosticsFactory, String collectionLink) { -// checkNotNull(resourceType, "Argument 'resourceType' must not be null."); -// checkNotNull(operationType, "Argument 'operationType' must not be null."); -// checkNotNull(callback, "Argument 'callback' must not be null."); -// -// final RequestOptions nonNullRequestOptions = -// initialRequestOptions != null ? initialRequestOptions : new RequestOptions(); -// -// checkArgument( -// resourceType == ResourceType.Document, -// "This method can only be used for document point operations."); -// -// CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = -// getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); -// -// List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( -// endToEndPolicyConfig, -// resourceType, -// operationType, -// idempotentWriteRetriesEnabled, -// nonNullRequestOptions); -// -// if (orderedApplicableRegionsForSpeculation.size() < 2) { -// // There is at most one applicable region - no hedging possible -// return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false); -// } -// -// ThresholdBasedAvailabilityStrategy availabilityStrategy = -// (ThresholdBasedAvailabilityStrategy) endToEndPolicyConfig.getAvailabilityStrategy(); -// List> monoList = new ArrayList<>(); -// -// final ScopedDiagnosticsFactory diagnosticsFactory = new ScopedDiagnosticsFactory(innerDiagnosticsFactory, false); - return this.collectionCache.resolveByNameAsync(null, collectionLink, null) .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { @@ -5565,16 +5553,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat // todo: validate if the below is possible if (collectionRoutingMapValueHolder.v == null) { - // throw new BulkExecutorUtil.CollectionRoutingMapNotFoundException(); - return Mono.error(new IllegalStateException("")); + return Mono.error(new NullPointerException("collectionRoutingMapValueHolder.v cannot be null!")); } - PartitionKeyRange partitionKeyRange = collectionRoutingMapValueHolder.v.getRangeByEffectivePartitionKey( - PartitionKeyInternalHelper.getEffectivePartitionKeyString( - ModelBridgeInternal.getPartitionKeyInternal(partitionKey), - partitionKeyDefinition)); - - nonNullRequestOptions.setResolvedPartitionKeyRange(partitionKeyRange); + nonNullRequestOptions.setPartitionKeyDefinition(collection.getPartitionKey()); List orderedApplicableRegionsForSpeculation = getApplicableRegionsForSpeculation( endToEndPolicyConfig, @@ -5585,7 +5567,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false); + return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false, collectionRoutingMapValueHolder); } ThresholdBasedAvailabilityStrategy availabilityStrategy = @@ -5605,7 +5587,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient Mono initialMonoAcrossAllRegions = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, false) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, false, collectionRoutingMapValueHolder) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isCosmosException, @@ -5633,7 +5615,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat // operator below will complete the composite Mono for both successful values // and non-transient errors Mono regionalCrossRegionRetryMono = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, true) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, true, collectionRoutingMapValueHolder) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isNonTransientCosmosException, @@ -6053,7 +6035,8 @@ Mono> apply( RequestOptions requestOptions, CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged); + boolean isRequestHedged, + Utils.ValueHolder collectionRoutingMap); } private static class NonTransientPointOperationResult { From 84cc95a0b08b8c1ca260e99f3e917612b245c979 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 30 Apr 2024 18:04:32 -0400 Subject: [PATCH 021/140] Adding a way to exclude partition-level unavailable regions for queries. --- .../PartitionLevelCircuitBreakerTests.java | 593 +++++++++++++++++- .../ImplementationBridgeHelpers.java | 9 + .../implementation/RxDocumentClientImpl.java | 64 +- .../DefaultDocumentQueryExecutionContext.java | 13 +- .../query/DocumentProducer.java | 22 +- .../query/IDocumentQueryClient.java | 9 +- ...llelDocumentQueryExecutionContextBase.java | 3 + .../models/CosmosQueryRequestOptions.java | 40 ++ 8 files changed, 737 insertions(+), 16 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index c60fbec055b7..b0104a78d7b8 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -15,7 +15,10 @@ import com.azure.cosmos.models.CosmosContainerProperties; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosPatchItemRequestOptions; +import com.azure.cosmos.models.CosmosPatchOperations; import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.PartitionKey; import com.azure.cosmos.models.ThroughputProperties; import com.azure.cosmos.test.faultinjection.CosmosFaultInjectionHelper; @@ -69,7 +72,7 @@ public void beforeClass() { } @Test(groups = {"multi-master"}) - public void readHits503InPrimaryRegion() { + public void readHits503InFirstPreferredRegion() { List preferredRegions = this.writeRegions; CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); @@ -168,6 +171,594 @@ public void readHits503InPrimaryRegion() { } } + @Test(groups = {"multi-master"}) + public void upsertHits503InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("upsertHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + TestObject testObject = TestObject.create(); + + String itemIdMappingToUnhealthyPartition = testObject.getId(); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.UPSERT_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(13) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) + .block(); + + container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + + for (int i = 1; i <= 15; i++) { + CosmosItemResponse response = container.upsertItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + CosmosItemResponse response = container.upsertItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + logger.info("Sleep for 60 seconds"); + + Thread.sleep(60_000); + + for (int i = 1; i <= 30; i++) { + response = container.upsertItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Upsert operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + + @Test(groups = {"multi-master"}) + public void createHits503InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("createHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + TestObject testObject = TestObject.create(); + + String itemIdMappingToUnhealthyPartition = testObject.getId(); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.CREATE_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(13) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) + .block(); + + container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + + for (int i = 1; i <= 15; i++) { + testObject = TestObject.create(); + CosmosItemResponse response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + testObject = TestObject.create(); + CosmosItemResponse response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + logger.info("Sleep for 60 seconds"); + + Thread.sleep(60_000); + + for (int i = 1; i <= 30; i++) { + testObject = TestObject.create(); + response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Create operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + + @Test(groups = {"multi-master"}) + public void deleteHits503InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("deleteHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.DELETE_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(13) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) + .block(); + + List idAndPks = new ArrayList<>(); + + for (int i = 1; i <= 30; i++) { + TestObject testObject = TestObject.create(); + CosmosItemResponse response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + idAndPks.add(testObject.getId()); + } + + for (int i = 0; i < 15; i++) { + CosmosItemResponse response = container.deleteItem(idAndPks.get(i), new PartitionKey(idAndPks.get(i)), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("Sleeping for a minute!"); + Thread.sleep(60_000); + + for (int i = 15; i < 30; i++) { + CosmosItemResponse response = container.deleteItem(idAndPks.get(i), new PartitionKey(idAndPks.get(i)), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Create operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + + @Test(groups = {"multi-master"}) + public void patchHits503InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("createHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.PATCH_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(13) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) + .block(); + + TestObject testObject = TestObject.create(); + container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + + CosmosPatchOperations patchOperations = CosmosPatchOperations.create().add("/number", 555); + + for (int i = 0; i < 15; i++) { + CosmosItemResponse response = container.patchItem(testObject.getId(), new PartitionKey(testObject.getId()), patchOperations, new CosmosPatchItemRequestOptions(), TestObject.class).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("Sleep for 60 seconds!"); + Thread.sleep(60_000); + + for (int i = 0; i < 15; i++) { + CosmosItemResponse response = container.patchItem(testObject.getId(), new PartitionKey(testObject.getId()), patchOperations, new CosmosPatchItemRequestOptions(), TestObject.class).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Patch operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + + @Test(groups = {"multi-master"}) + public void replaceHits503InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("createHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.REPLACE_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(13) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) + .block(); + + TestObject testObject = TestObject.create(); + container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + + for (int i = 0; i < 15; i++) { + CosmosItemResponse response = container.replaceItem(testObject, testObject.getId(), new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("Sleep for 60 seconds!"); + Thread.sleep(60_000); + + for (int i = 0; i < 15; i++) { + CosmosItemResponse response = container.replaceItem(testObject, testObject.getId(), new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Replace operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + + @Test(groups = {"multi-master"}) + public void queryHits503InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("readHits503InPrimaryRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(6_000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + TestObject testObject = TestObject.create(); + + String itemIdMappingToUnhealthyPartition = testObject.getId(); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.QUERY_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(13) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) + .block(); + + container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + + for (int i = 1; i <= 15; i++) { + FeedResponse response = container.queryItems("SELECT * FROM c", TestObject.class).byPage().blockLast(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getCosmosDiagnostics()).isNotNull(); + + response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + + logger.info("CosmosDiagnostics : {}", response.getCosmosDiagnostics().toString()); + } + + logger.info("Sleep for 60 seconds!"); + Thread.sleep(60_000); + + for (int i = 1; i <= 30; i++) { + FeedResponse response = container.queryItems("SELECT * FROM c", TestObject.class).byPage().blockLast(); + logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + + assertThat(response).isNotNull(); + assertThat(response.getCosmosDiagnostics()).isNotNull(); + + response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Query operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { Iterator locationIterator = writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 4e17dbe5afc4..468305d63e64 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -62,6 +62,7 @@ import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.ModelBridgeInternal; import com.azure.cosmos.models.PartitionKey; +import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.PriorityLevel; import com.azure.cosmos.models.SqlQuerySpec; import com.azure.cosmos.util.CosmosPagedFlux; @@ -289,6 +290,14 @@ void setCancelledRequestDiagnosticsTracker( Integer getMaxItemCount(CosmosQueryRequestOptions options); String getRequestContinuation(CosmosQueryRequestOptions options); + + void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition); + + PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options); + + void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid); + + String getCollectionRid(CosmosQueryRequestOptions options); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 92eff425d155..a3e3e5380615 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -3610,6 +3610,40 @@ public Mono readFeedAsync(RxDocumentServiceRequest re // TODO Auto-generated method stub return null; } + + @Override + public Mono populateFeedRangeHeader(RxDocumentServiceRequest request) { + + if (RxDocumentClientImpl.this.requiresFeedRangeFiltering(request)) { + return request + .getFeedRange() + .populateFeedRangeFilteringHeaders(RxDocumentClientImpl.this.partitionKeyRangeCache, request, RxDocumentClientImpl.this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request)) + .flatMap(ignore -> Mono.just(request)); + } else { + return Mono.just(request); + } + } + + @Override + public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions) { + + String collectionRid = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getCollectionRid(queryRequestOptions); + + if (RxDocumentClientImpl.this.requiresFeedRangeFiltering(request)) { + return RxDocumentClientImpl.this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), collectionRid, null, null) + .flatMap(collectionRoutingMapValueHolder -> { + + if (collectionRoutingMapValueHolder.v == null) { + return Mono.error(new NotFoundException("collectionRoutingMap could not be found!")); + } + + RxDocumentClientImpl.this.addPartitionLevelUnavailableRegionsForFeedRequest(request, queryRequestOptions, collectionRoutingMapValueHolder.v); + return Mono.just(request); + }); + } else { + return Mono.just(request); + } + } }; } @@ -5503,6 +5537,33 @@ private void addPartitionLevelUnavailableRegionsForRequest( } } + private void addPartitionLevelUnavailableRegionsForFeedRequest( + RxDocumentServiceRequest request, + CosmosQueryRequestOptions options, + CollectionRoutingMap collectionRoutingMap) { + + checkNotNull(collectionRoutingMap, "collectionRoutingMap cannot be null!"); + + PartitionKeyRange resolvedPartitionKeyRange = null; + + if (request.getPartitionKeyRangeIdentity() != null) { + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByPartitionKeyRangeId(request.getPartitionKeyRangeIdentity().getPartitionKeyRangeId()); + } else if (request.getPartitionKeyInternal() != null) { + String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(request.getPartitionKeyInternal(), ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getPartitionKeyDefinition(options)); + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + } + + checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationsForPartition(resolvedPartitionKeyRange); + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); + + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + } + private Mono> wrapPointOperationWithAvailabilityStrategy( ResourceType resourceType, OperationType operationType, @@ -5549,11 +5610,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); PartitionKeyDefinition partitionKeyDefinition = collection.getPartitionKey(); - PartitionKey partitionKey = nonNullRequestOptions.getPartitionKey(); // todo: validate if the below is possible if (collectionRoutingMapValueHolder.v == null) { - return Mono.error(new NullPointerException("collectionRoutingMapValueHolder.v cannot be null!")); + return Mono.error(new NotFoundException("collectionRoutingMapValueHolder.v cannot be null!")); } nonNullRequestOptions.setPartitionKeyDefinition(collection.getPartitionKey()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index 5767c5c1ecd5..d9304bc8c8bd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -195,9 +195,16 @@ private Mono> executeInternalFuncCore( return BackoffRetryUtility.executeRetry(() -> { this.retries.incrementAndGet(); - return executeRequestAsync( - this.factoryMethod, - req); + return Mono.just(req) + .flatMap(request -> client.populateFeedRangeHeader(request)) + .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions)) + .flatMap(request -> { + + finalRetryPolicyInstance.onBeforeSendRequest(request); + return executeRequestAsync( + this.factoryMethod, + request); + }); }, finalRetryPolicyInstance) .map(tFeedResponse -> { this.fetchSchedulingMetrics.stop(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index 4239f7f59ced..be94f846f4d8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -18,6 +18,7 @@ import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.ImmutablePair; +import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.query.metrics.ClientSideMetrics; import com.azure.cosmos.implementation.query.metrics.FetchExecutionRangeAccumulator; @@ -142,14 +143,19 @@ public DocumentProducer( executeFeedOperationCore = (clientRetryPolicyFactory, request) -> { DocumentClientRetryPolicy finalRetryPolicy = clientRetryPolicyFactory.get(); return ObservableHelper.inlineIfPossibleAsObs( - () -> { - if(finalRetryPolicy != null) { - finalRetryPolicy.onBeforeSendRequest(request); - } - - ++retries; - return executeRequestFunc.apply(request); - }, finalRetryPolicy); + () -> Mono + .just(request) + .flatMap(req -> client.populateFeedRangeHeader(req)) + .flatMap(req -> client.addPartitionLevelUnavailableRegionsOnRequest(req, cosmosQueryRequestOptions)) + .flatMap(req -> { + + if(finalRetryPolicy != null) { + finalRetryPolicy.onBeforeSendRequest(req); + } + ++retries; + return Mono.just(req); + }) + .flatMap(req -> executeRequestFunc.apply(req)), finalRetryPolicy); }; this.correlatedActivityId = correlatedActivityId; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java index 42583fb2d5be..677be0dc8aa0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java @@ -11,6 +11,7 @@ import com.azure.cosmos.implementation.IRetryPolicyFactory; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.RxDocumentServiceResponse; +import com.azure.cosmos.models.CosmosQueryRequestOptions; import reactor.core.publisher.Mono; import java.util.function.BiFunction; @@ -41,13 +42,13 @@ public interface IDocumentQueryClient { /** * TODO: this should be async returning observable - * @return + * @return */ ConsistencyLevel getDefaultConsistencyLevelAsync(); /** * TODO: this should be async returning observable - * @return + * @return */ ConsistencyLevel getDesiredConsistencyLevelAsync(); @@ -85,4 +86,8 @@ enum QueryCompatibilityMode { } Mono readFeedAsync(RxDocumentServiceRequest request); + + Mono populateFeedRangeHeader(RxDocumentServiceRequest request); + + Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java index ecaacb5ec272..1fd3f1e62e79 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java @@ -7,6 +7,7 @@ import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; @@ -84,6 +85,8 @@ protected void initialize( } } + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setPartitionKeyDefinition(cosmosQueryRequestOptions, collection.getPartitionKey()); + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setCollectionRid(cosmosQueryRequestOptions, collection.getResourceId()); return this.createDocumentServiceRequestWithFeedRange(headers, querySpecForInit, partitionKeyInternal, feedRange, collection.getResourceId(), cosmosQueryRequestOptions.getThroughputControlGroupName()); }; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index a82199422205..d0e5a852b22e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -60,6 +60,8 @@ public class CosmosQueryRequestOptions { private CosmosEndToEndOperationLatencyPolicyConfig cosmosEndToEndOperationLatencyPolicyConfig; private List excludeRegions; private List cancelledRequestDiagnosticsTracker = new ArrayList<>(); + private PartitionKeyDefinition partitionKeyDefinition; + private String collectionRid; /** * Instantiates a new query request options. @@ -105,6 +107,8 @@ public CosmosQueryRequestOptions() { this.cosmosEndToEndOperationLatencyPolicyConfig = options.cosmosEndToEndOperationLatencyPolicyConfig; this.excludeRegions = options.excludeRegions; this.cancelledRequestDiagnosticsTracker = options.cancelledRequestDiagnosticsTracker; + this.partitionKeyDefinition = options.partitionKeyDefinition; + this.collectionRid = options.collectionRid; } void setOperationContextAndListenerTuple(OperationContextAndListenerTuple operationContextAndListenerTuple) { @@ -703,6 +707,22 @@ void setCancelledRequestDiagnosticsTracker(List cancelledRequ this.cancelledRequestDiagnosticsTracker = cancelledRequestDiagnosticsTracker; } + PartitionKeyDefinition getPartitionKeyDefinition() { + return partitionKeyDefinition; + } + + void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + this.partitionKeyDefinition = partitionKeyDefinition; + } + + public String getCollectionRid() { + return collectionRid; + } + + public void setCollectionRid(String collectionRid) { + this.collectionRid = collectionRid; + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// /////////////////////////////////////////////////////////////////////////////////////////// @@ -863,6 +883,26 @@ public String getRequestContinuation(CosmosQueryRequestOptions options) { return options.getRequestContinuation(); } + @Override + public void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition) { + options.setPartitionKeyDefinition(partitionKeyDefinition); + } + + @Override + public PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options) { + return options.getPartitionKeyDefinition(); + } + + @Override + public void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid) { + options.setCollectionRid(collectionRid); + } + + @Override + public String getCollectionRid(CosmosQueryRequestOptions options) { + return options.getCollectionRid(); + } + @Override public List getExcludeRegions(CosmosQueryRequestOptions options) { return options.getExcludedRegions(); From 5e3ca33269cf2f6a82138571943f11da99fee5db Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 1 May 2024 17:29:06 -0400 Subject: [PATCH 022/140] Adding circuit breaking for 408s in point operations. --- ...itionEndpointManagerForCircuitBreaker.java | 57 +++++++++++++++++++ .../implementation/RxDocumentClientImpl.java | 13 ++++- 2 files changed, 67 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index db98349cc735..0f4509c79e93 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -97,6 +97,63 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR return false; } + public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request, URI failedLocation) { + + if (request == null) { + throw new IllegalArgumentException("request cannot be null!"); + } + + if (request.requestContext == null) { + + if (logger.isDebugEnabled()) { + logger.warn("requestContext is null!"); + } + + return false; + } + + PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + + if (partitionKeyRange == null) { + return false; + } + + AtomicBoolean isFailoverPossible = new AtomicBoolean(true); + AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); + + this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + + if (partitionKeyRangeFailoverInfoAsVal == null) { + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); + } + + isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(request)); + + if (isFailureThresholdBreached.get()) { + + UnmodifiableList applicableEndpoints = request.isReadOnly() ? + this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : + this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); + + isFailoverPossible.set( + partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); + } + + return partitionKeyRangeFailoverInfoAsVal; + }); + + // set to true if and only if failure threshold exceeded for the region + // and if failover is possible + // a failover is only possible when there are available regions left to fail over to + if (isFailoverPossible.get()) { + return true; + } + + // no regions to fail over to + this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); + return false; + } + @Override public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index a3e3e5380615..01bff726c602 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -17,6 +17,7 @@ import com.azure.cosmos.DirectConnectionConfig; import com.azure.cosmos.SessionRetryOptions; import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.ImmutablePair; import com.azure.cosmos.implementation.batch.BatchResponseParser; @@ -105,6 +106,7 @@ import java.util.Locale; import java.util.Map; import java.util.NoSuchElementException; +import java.util.Optional; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; @@ -2227,9 +2229,14 @@ private Mono handleRegionFeedbackForPointOperation( if (!isRequestHedged) { if (throwable instanceof OperationCancelledException) { - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); - } else if (throwable instanceof ServiceUnavailableException) { - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get()); + + OperationCancelledException exception = Utils.as(throwable, OperationCancelledException.class); + Optional firstContactedRegion = exception.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().stream().findFirst(); + + UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); } } }); From ebdbd8f664cc14c605567e0acc8329884e6b51f2 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 8 May 2024 08:52:43 -0400 Subject: [PATCH 023/140] Handling 408 cases for queries. --- .../implementation/ClientRetryPolicyTest.java | 20 ++-- .../PartitionLevelCircuitBreakerTests.java | 5 + ...eCollectionAwareClientRetryPolicyTest.java | 8 +- .../RxGatewayStoreModelTest.java | 8 +- .../AddressResolverTest.java | 5 +- .../implementation/query/FetcherTest.java | 7 +- .../implementation/ClientRetryPolicy.java | 6 +- .../DocumentServiceRequestContext.java | 1 + ...itionEndpointManagerForCircuitBreaker.java | 100 ++---------------- .../IGlobalPartitionEndpointManager.java | 1 - .../ImplementationBridgeHelpers.java | 5 + .../cosmos/implementation/RetryPolicy.java | 4 +- .../implementation/RxDocumentClientImpl.java | 16 ++- .../RxDocumentServiceRequest.java | 3 - .../implementation/RxGatewayStoreModel.java | 8 +- .../directconnectivity/AddressResolver.java | 13 ++- .../GlobalAddressResolver.java | 14 +-- .../directconnectivity/IAddressResolver.java | 4 +- .../directconnectivity/StoreClient.java | 5 +- .../rntbd/RntbdRequestManager.java | 1 + .../query/ChangeFeedFetcher.java | 2 +- .../DefaultDocumentQueryExecutionContext.java | 2 +- .../query/DocumentProducer.java | 10 +- .../cosmos/implementation/query/Fetcher.java | 42 +++++++- .../query/IDocumentQueryClient.java | 6 ++ .../implementation/query/Paginator.java | 36 +++++-- ...ServerSideOnlyContinuationFetcherImpl.java | 11 +- .../models/CosmosQueryRequestOptions.java | 23 ++++ 28 files changed, 210 insertions(+), 156 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java index 5492c4a843be..d37574d46891 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java @@ -44,7 +44,7 @@ public static Object[][] operationProvider() { public void networkFailureOnRead() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); @@ -84,7 +84,7 @@ public void shouldRetryOnGatewayTimeout( boolean shouldCrossRegionRetry) throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(true)); @@ -127,7 +127,7 @@ public void shouldRetryOnGatewayTimeout( public void tcpNetworkFailureOnRead() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); @@ -175,7 +175,7 @@ public void tcpNetworkFailureOnRead() throws Exception { public void networkFailureOnWrite() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); @@ -206,7 +206,7 @@ public void networkFailureOnWrite() throws Exception { public void tcpNetworkFailureOnWrite() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); @@ -274,7 +274,7 @@ public void tcpNetworkFailureOnWrite() throws Exception { public void networkFailureOnUpsert() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); @@ -307,7 +307,7 @@ public void networkFailureOnUpsert() throws Exception { public void tcpNetworkFailureOnUpsert() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); @@ -343,7 +343,7 @@ public void tcpNetworkFailureOnUpsert() throws Exception { public void networkFailureOnDelete() throws Exception { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); @@ -377,7 +377,7 @@ public void networkFailureOnDelete() throws Exception { public void tcpNetworkFailureOnDelete() throws Exception { ThrottlingRetryOptions retryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("http://localhost")).when(endpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); @@ -413,7 +413,7 @@ public void tcpNetworkFailureOnDelete() throws Exception { public void onBeforeSendRequestNotInvoked() { ThrottlingRetryOptions throttlingRetryOptions = new ThrottlingRetryOptions(); GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(Mockito.eq(null), Mockito.eq(false)); ClientRetryPolicy clientRetryPolicy = new ClientRetryPolicy(mockDiagnosticsClientContext(), endpointManager, true, throttlingRetryOptions, null, globalPartitionEndpointManager); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index b0104a78d7b8..5472ea3e0ef8 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -759,6 +759,11 @@ public void queryHits503InFirstPreferredRegion() { } } + @Test(groups = {"multi-master"}) + public void readHits408InFirstPreferredRegion() { + + } + private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { Iterator locationIterator = writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java index d20c623b01f8..e10e7dc36c0c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java @@ -23,7 +23,7 @@ public class RenameCollectionAwareClientRetryPolicyTest { @Test(groups = "unit", timeOut = TIMEOUT) public void onBeforeSendRequestNotInvoked() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); @@ -53,7 +53,7 @@ public void onBeforeSendRequestNotInvoked() { @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithNotFoundStatusCode() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); RxClientCollectionCache rxClientCollectionCache = Mockito.mock(RxClientCollectionCache.class); @@ -80,7 +80,7 @@ public void shouldRetryWithNotFoundStatusCode() { @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithNotFoundStatusCodeAndReadSessionNotAvailableSubStatusCode() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); @@ -119,7 +119,7 @@ public void shouldRetryWithNotFoundStatusCodeAndReadSessionNotAvailableSubStatus @Test(groups = "unit", timeOut = TIMEOUT) public void shouldRetryWithGenericException() { GlobalEndpointManager endpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(Mono.empty()).when(endpointManager).refreshLocationAsync(eq(null), eq(false)); IRetryPolicyFactory retryPolicyFactory = new RetryPolicy(mockDiagnosticsClientContext(), endpointManager, ConnectionPolicy.getDefaultPolicy(), globalPartitionEndpointManager); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index 89fccfa94374..f72c7f1cf8b4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -79,7 +79,7 @@ public void readTimeout() throws Exception { QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; UserAgentContainer userAgentContainer = new UserAgentContainer(); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); @@ -124,7 +124,7 @@ public void serviceUnavailable() throws Exception { QueryCompatibilityMode queryCompatibilityMode = QueryCompatibilityMode.Default; UserAgentContainer userAgentContainer = new UserAgentContainer(); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); HttpClient httpClient = Mockito.mock(HttpClient.class); @@ -178,7 +178,7 @@ public void applySessionToken( Mockito.doReturn(sdkGlobalSessionToken).when(sessionContainer).resolveGlobalSessionToken(any()); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); @@ -250,7 +250,7 @@ public void validateApiType() throws Exception { Mockito.doReturn(sdkGlobalSessionToken).when(sessionContainer).resolveGlobalSessionToken(any()); GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(new URI("https://localhost")) .when(globalEndpointManager).resolveServiceEndpoint(any()); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java index bc40a4a1a1a3..986cdc8a8c94 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java @@ -7,6 +7,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; @@ -66,14 +67,14 @@ public class AddressResolverTest { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache fabricAddressCache; - private IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private int collectionCacheRefreshedCount; private Map routingMapRefreshCount; private Map addressesRefreshCount; @BeforeClass(groups = "unit") public void before_AddressResolverTest() throws Exception { - this.globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + this.globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); this.addressResolver = new AddressResolver(this.globalPartitionEndpointManager); this.collectionCache = Mockito.mock(RxCollectionCache.class); this.collectionRoutingMapCache = Mockito.mock(ICollectionRoutingMapCache.class); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java index 92727105c3cc..9887984654cc 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java @@ -105,7 +105,11 @@ public void query(CosmosQueryRequestOptions options, int top) { ImplementationBridgeHelpers .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() - .getCancelledRequestDiagnosticsTracker(options)); + .getCancelledRequestDiagnosticsTracker(options), + ImplementationBridgeHelpers + .CosmosQueryRequestOptionsHelper + .getCosmosQueryRequestOptionsAccessor() + .getPkRangesWithSuccessfulRequests(options)); validateFetcher(fetcher, options, top, feedResponseList); } @@ -170,6 +174,7 @@ public void changeFeed() { .CosmosChangeFeedRequestOptionsHelper .getCosmosChangeFeedRequestOptionsAccessor() .getOperationContext(options), + null, null); validateFetcher(fetcher, options, feedResponseList); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 8261c16dd100..1e342bd457e7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -52,14 +52,14 @@ public class ClientRetryPolicy extends DocumentClientRetryPolicy { private RxDocumentServiceRequest request; private RxCollectionCache rxCollectionCache; private final FaultInjectionRequestContext faultInjectionRequestContext; - private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, GlobalEndpointManager globalEndpointManager, boolean enableEndpointDiscovery, ThrottlingRetryOptions throttlingRetryOptions, RxCollectionCache rxCollectionCache, - IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { this.globalEndpointManager = globalEndpointManager; this.failoverRetryCount = 0; @@ -322,7 +322,7 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( // if partition-level circuit breaker is enabled if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(this.request); + this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); } // The request has failed with 503, SDK need to decide whether it is safe to retry for write operations diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index ef3bd6fd8668..c22bd3443d36 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -51,6 +51,7 @@ public class DocumentServiceRequestContext implements Cloneable { private volatile List excludeRegions; private volatile List unavailableRegionsForPartition; private volatile boolean isRequestHedged = false; + public volatile boolean isRequestSendingStarted = false; // For cancelled rntbd requests, track the response as OperationCancelledException which later will be used to populate the cosmosDiagnostics public final Map rntbdCancelledRequestMap = new ConcurrentHashMap<>(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 0f4509c79e93..09113489486a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -22,7 +22,7 @@ import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; -public class GlobalPartitionEndpointManagerForCircuitBreaker implements IGlobalPartitionEndpointManager { +public class GlobalPartitionEndpointManagerForCircuitBreaker { private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); @@ -38,65 +38,6 @@ public void init() { this.updateStaleLocationInfo().subscribeOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE).subscribe(); } - @Override - public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request) { - - if (request == null) { - throw new IllegalArgumentException("request cannot be null!"); - } - - if (request.requestContext == null) { - - if (logger.isDebugEnabled()) { - logger.warn("requestContext is null!"); - } - - return false; - } - - PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; - URI failedLocation = request.requestContext.locationEndpointToRoute; - - if (partitionKeyRange == null) { - return false; - } - - AtomicBoolean isFailoverPossible = new AtomicBoolean(true); - AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - - this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { - - if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); - } - - isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(request)); - - if (isFailureThresholdBreached.get()) { - - UnmodifiableList applicableEndpoints = request.isReadOnly() ? - this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : - this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); - - isFailoverPossible.set( - partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); - } - - return partitionKeyRangeFailoverInfoAsVal; - }); - - // set to true if and only if failure threshold exceeded for the region - // and if failover is possible - // a failover is only possible when there are available regions left to fail over to - if (isFailoverPossible.get()) { - return true; - } - - // no regions to fail over to - this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); - return false; - } - public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request, URI failedLocation) { if (request == null) { @@ -127,7 +68,7 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); } - isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(request)); + isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(failedLocation)); if (isFailureThresholdBreached.get()) { @@ -154,7 +95,6 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR return false; } - @Override public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { if (request == null) { @@ -251,39 +191,21 @@ private static class PartitionLevelLocationUnavailabilityInfo { this.locationEndpointToFailureMetricsForPartition = new ConcurrentHashMap<>(); } - public boolean isFailureThresholdBreachedForLocation(RxDocumentServiceRequest request) { + public boolean isFailureThresholdBreachedForLocation(URI locationWithFailure) { AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - if (request.locationLevelCircuitBreakerRequestContext == null) { - return false; - } - - if (!request.locationLevelCircuitBreakerRequestContext.getFailuresForAllLocations().isEmpty()) { - - ConcurrentHashMap> failuresForAllLocations - = request.locationLevelCircuitBreakerRequestContext.getFailuresForAllLocations(); - - for (Map.Entry> failuresPerLocation : failuresForAllLocations.entrySet()) { - - URI location = failuresPerLocation.getKey(); - ConcurrentHashMap errorCounts = failuresPerLocation.getValue(); + this.locationEndpointToFailureMetricsForPartition.compute(locationWithFailure, (locationAsKey, failureMetricsForPartitionAsVal) -> { - this.locationEndpointToFailureMetricsForPartition.compute(location, (locationAsKey, failureMetricsForPartitionAsVal) -> { - - if (failureMetricsForPartitionAsVal == null) { - failureMetricsForPartitionAsVal = new FailureMetricsForPartition(); - } + if (failureMetricsForPartitionAsVal == null) { + failureMetricsForPartitionAsVal = new FailureMetricsForPartition(); + } - for (Map.Entry countForError : errorCounts.entrySet()) { - failureMetricsForPartitionAsVal.handleFailure(countForError.getValue()); - } + failureMetricsForPartitionAsVal.handleFailure(1); - isFailureThresholdBreached.set(failureMetricsForPartitionAsVal.isFailureThresholdBreached()); - return failureMetricsForPartitionAsVal; - }); - } - } + isFailureThresholdBreached.set(failureMetricsForPartitionAsVal.isFailureThresholdBreached()); + return failureMetricsForPartitionAsVal; + }); return isFailureThresholdBreached.get(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java index 8c1c327fb004..994e67bcbd0b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java @@ -4,6 +4,5 @@ package com.azure.cosmos.implementation; public interface IGlobalPartitionEndpointManager { - boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request); boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 468305d63e64..30df715c8267 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -81,6 +81,7 @@ import java.util.EnumSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.concurrent.Callable; import java.util.concurrent.ConcurrentMap; @@ -298,6 +299,10 @@ void setCancelledRequestDiagnosticsTracker( void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid); String getCollectionRid(CosmosQueryRequestOptions options); + + void setPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options, Set pkRangesWithSuccessfulRequests); + + Set getPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java index 4a11d2919c19..f26cfc76a4b1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java @@ -15,7 +15,7 @@ public class RetryPolicy implements IRetryPolicyFactory { private final DiagnosticsClientContext diagnosticsClientContext; private final GlobalEndpointManager globalEndpointManager; - private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private final boolean enableEndpointDiscovery; private final ThrottlingRetryOptions throttlingRetryOptions; private RxCollectionCache rxCollectionCache; @@ -24,7 +24,7 @@ public RetryPolicy( DiagnosticsClientContext diagnosticsClientContext, GlobalEndpointManager globalEndpointManager, ConnectionPolicy connectionPolicy, - IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { this.diagnosticsClientContext = diagnosticsClientContext; this.enableEndpointDiscovery = connectionPolicy.isEndpointDiscoveryEnabled(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 01bff726c602..489dfcc0559e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -732,7 +732,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, ApiType apiType, - IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { return new RxGatewayStoreModel( this, sessionContainer, @@ -3651,6 +3651,16 @@ public Mono addPartitionLevelUnavailableRegionsOnReque return Mono.just(request); } } + + @Override + public GlobalEndpointManager getGlobalEndpointManager() { + return RxDocumentClientImpl.this.getGlobalEndpointManager(); + } + + @Override + public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { + return RxDocumentClientImpl.this.globalPartitionEndpointManagerForCircuitBreaker; + } }; } @@ -5145,7 +5155,9 @@ private Flux> nonDocumentReadFeedInternal( nonNullOptions, createRequestFunc, executeFunc, - maxPageSize); + maxPageSize, + this.globalEndpointManager, + this.globalPartitionEndpointManagerForCircuitBreaker); } @Override diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java index 69c2a3e52efc..df0bf4b865a0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java @@ -57,7 +57,6 @@ public class RxDocumentServiceRequest implements Cloneable { public DocumentServiceRequestContext requestContext; public FaultInjectionRequestContext faultInjectionRequestContext; - public LocationLevelCircuitBreakerRequestContext locationLevelCircuitBreakerRequestContext; // has the non serialized value of the partition-key private PartitionKeyInternal partitionKeyInternal; @@ -175,7 +174,6 @@ private RxDocumentServiceRequest(DiagnosticsClientContext clientContext, this.authorizationTokenType = authorizationTokenType; this.requestContext = new DocumentServiceRequestContext(); this.faultInjectionRequestContext = new FaultInjectionRequestContext(); - this.locationLevelCircuitBreakerRequestContext = new LocationLevelCircuitBreakerRequestContext(false); if (StringUtils.isNotEmpty(this.headers.get(WFConstants.BackendHeaders.PARTITION_KEY_RANGE_ID))) this.partitionKeyRangeIdentity = PartitionKeyRangeIdentity.fromHeader(this.headers.get(WFConstants.BackendHeaders.PARTITION_KEY_RANGE_ID)); } @@ -196,7 +194,6 @@ private RxDocumentServiceRequest(DiagnosticsClientContext clientContext, this.clientContext = clientContext; this.requestContext = new DocumentServiceRequestContext(); this.faultInjectionRequestContext = new FaultInjectionRequestContext(); - this.locationLevelCircuitBreakerRequestContext = new LocationLevelCircuitBreakerRequestContext(false); this.operationType = operationType; this.resourceType = resourceType; this.requestContext.sessionToken = null; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 7d9999adf593..6849acd0bb75 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -61,7 +61,7 @@ public class RxGatewayStoreModel implements RxStoreModel { private final HttpClient httpClient; private final QueryCompatibilityMode queryCompatibilityMode; private final GlobalEndpointManager globalEndpointManager; - private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private ConsistencyLevel defaultConsistencyLevel; private ISessionContainer sessionContainer; private ThroughputControlStore throughputControlStore; @@ -80,7 +80,7 @@ public RxGatewayStoreModel( GlobalEndpointManager globalEndpointManager, HttpClient httpClient, ApiType apiType, - IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { this.clientContext = clientContext; this.defaultHeaders = new HashMap<>(); this.defaultHeaders.put(HttpConstants.HttpHeaders.CACHE_CONTROL, @@ -248,7 +248,9 @@ public Mono performRequestInternal(RxDocumentServiceR HttpHeaders httpHeaders = this.getHttpRequestHeaders(request.getHeaders()); - Flux contentAsByteArray = request.getContentAsByteArrayFlux(); + Flux contentAsByteArray = request.getContentAsByteArrayFlux().doOnSubscribe(ignore -> { + request.requestContext.isRequestSendingStarted = true; + }); HttpRequest httpRequest = new HttpRequest(method, requestUri, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index 3f2ffc9109f4..e987044ac366 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -9,9 +9,9 @@ import com.azure.cosmos.implementation.BadRequestException; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.Exceptions; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; -import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.InternalServerErrorException; import com.azure.cosmos.implementation.InvalidPartitionException; import com.azure.cosmos.implementation.NotFoundException; @@ -22,7 +22,6 @@ import com.azure.cosmos.implementation.ResourceId; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; -import com.azure.cosmos.implementation.ServiceUnavailableException; import com.azure.cosmos.implementation.Strings; import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.NotImplementedException; @@ -56,10 +55,10 @@ public class AddressResolver implements IAddressResolver { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache addressCache; - private IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; - public AddressResolver(IGlobalPartitionEndpointManager globalPartitionEndpointManager) { - this.globalPartitionEndpointManager = globalPartitionEndpointManager; + public AddressResolver(GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { + this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; } public void initializeCaches( @@ -101,8 +100,8 @@ public void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proact } @Override - public IGlobalPartitionEndpointManager getGlobalPartitionEndpointManager() { - return this.globalPartitionEndpointManager; + public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { + return this.globalPartitionEndpointManagerForCircuitBreaker; } private static boolean isSameCollection(PartitionKeyRange initiallyResolved, PartitionKeyRange newlyResolved) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index 32eb60f9b121..49ff2bd28dd3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -11,8 +11,8 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; -import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.RxDocumentServiceRequest; @@ -48,7 +48,7 @@ public class GlobalAddressResolver implements IAddressResolver { private final static int MaxBackupReadRegions = 3; private final DiagnosticsClientContext diagnosticsClientContext; private final GlobalEndpointManager endpointManager; - private final IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; private final Protocol protocol; private final IAuthorizationTokenProvider tokenProvider; private final UserAgentContainer userAgentContainer; @@ -76,7 +76,7 @@ public GlobalAddressResolver( GatewayServiceConfigurationReader serviceConfigReader, ConnectionPolicy connectionPolicy, ApiType apiType, - IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { this.diagnosticsClientContext = diagnosticsClientContext; this.httpClient = httpClient; this.endpointManager = endpointManager; @@ -93,7 +93,7 @@ public GlobalAddressResolver( this.maxEndpoints = maxBackupReadEndpoints + 2; // for write and alternate write getEndpoint (during failover) this.addressCacheByEndpoint = new ConcurrentHashMap<>(); this.apiType = apiType; - this.globalPartitionEndpointManager = globalPartitionEndpointManager; + this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; for (URI endpoint : endpointManager.getWriteEndpoints()) { this.getOrAddEndpoint(endpoint); @@ -250,8 +250,8 @@ public void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proact } @Override - public IGlobalPartitionEndpointManager getGlobalPartitionEndpointManager() { - return this.globalPartitionEndpointManager; + public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { + return this.globalPartitionEndpointManagerForCircuitBreaker; } @Override @@ -299,7 +299,7 @@ private EndpointCache getOrAddEndpoint(URI endpoint) { this.connectionPolicy, this.proactiveOpenConnectionsProcessor, this.gatewayServerErrorInjector); - AddressResolver addressResolver = new AddressResolver(this.globalPartitionEndpointManager); + AddressResolver addressResolver = new AddressResolver(this.globalPartitionEndpointManagerForCircuitBreaker); addressResolver.initializeCaches(this.collectionCache, this.routingMapProvider, gatewayAddressCache); EndpointCache cache = new EndpointCache(); cache.addressCache = gatewayAddressCache; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java index e36a299a1053..b719a350ef7d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java @@ -4,7 +4,7 @@ package com.azure.cosmos.implementation.directconnectivity; import com.azure.cosmos.CosmosContainerProactiveInitConfig; -import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.directconnectivity.rntbd.ProactiveOpenConnectionsProcessor; @@ -34,5 +34,5 @@ Mono resolveAsync( */ void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proactiveOpenConnectionsProcessor); - IGlobalPartitionEndpointManager getGlobalPartitionEndpointManager(); + GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index 7b3ab26723d7..fec82c4b0e05 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -12,6 +12,7 @@ import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.Exceptions; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; @@ -192,8 +193,8 @@ private RxDocumentServiceResponse completeResponse( new RxDocumentServiceResponse(this.diagnosticsClientContext, storeResponse); rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = addressResolver.getGlobalPartitionEndpointManager(); - globalPartitionEndpointManager.tryBookmarkRegionSuccessForPartitionKeyRange(request); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = addressResolver.getGlobalPartitionEndpointManagerForCircuitBreaker(); + globalPartitionEndpointManagerForCircuitBreaker.tryBookmarkRegionSuccessForPartitionKeyRange(request); return rxDocumentServiceResponse; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java index 6d270bc21ff4..3bb49746e35f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java @@ -636,6 +636,7 @@ public void write(final ChannelHandlerContext context, final Object message, fin if (!record.isCancelled()) { record.setSendingRequestHasStarted(); + record.args().serviceRequest().requestContext.isRequestSendingStarted = true; this.timestamps.channelWriteAttempted(); if (this.serverErrorInjector != null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java index df8f92710910..1109c4a2c85c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java @@ -48,7 +48,7 @@ public ChangeFeedFetcher( int maxItemCount, boolean isSplitHandlingDisabled, OperationContextAndListenerTuple operationContext) { - super(executeFunc, true, top, maxItemCount, operationContext, null); + super(executeFunc, true, top, maxItemCount, operationContext, null, null, null, null); checkNotNull(client, "Argument 'client' must not be null."); checkNotNull(createRequestFunc, "Argument 'createRequestFunc' must not be null."); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index d9304bc8c8bd..c549cf8d586d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -125,7 +125,7 @@ public Flux> executeAsync() { return Paginator .getPaginatedQueryResultAsObservable( - newCosmosQueryRequestOptions, createRequestFunc, executeFunc, maxPageSize); + newCosmosQueryRequestOptions, createRequestFunc, executeFunc, maxPageSize, null, null); } public Mono> getTargetPartitionKeyRanges(String resourceId, List> queryRanges) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index be94f846f4d8..5453be4040cd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -211,8 +211,14 @@ public Flux produceAsync() { ImplementationBridgeHelpers .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() - .getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions) - ) + .getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), + ImplementationBridgeHelpers + .CosmosQueryRequestOptionsHelper + .getCosmosQueryRequestOptionsAccessor() + .getPkRangesWithSuccessfulRequests(cosmosQueryRequestOptions), + client.getGlobalEndpointManager(), + client.getGlobalPartitionEndpointManagerForCircuitBreaker() + ) .map(rsp -> { this.lastResponseContinuationToken = rsp.getContinuationToken(); this.fetchExecutionRangeAccumulator.endFetchRange(rsp.getActivityId(), diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 6a113b8209c1..7f4168ff9eb4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -3,8 +3,14 @@ package com.azure.cosmos.implementation.query; +import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.implementation.Configs; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.ModelBridgeInternal; @@ -13,11 +19,15 @@ import reactor.core.publisher.Mono; import reactor.core.publisher.SignalType; +import java.net.URI; import java.util.List; +import java.util.Optional; +import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.function.Supplier; +import java.util.stream.Collectors; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -33,6 +43,9 @@ abstract class Fetcher { private final AtomicInteger maxItemCount; private final AtomicInteger top; private final List cancelledRequestDiagnosticsTracker; + private final Set pkRangesWithSuccessfulRequests; + private final GlobalEndpointManager globalEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; public Fetcher( Function>> executeFunc, @@ -40,7 +53,10 @@ public Fetcher( int top, int maxItemCount, OperationContextAndListenerTuple operationContext, - List cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + Set pkRangesWithSuccessfulRequests, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { checkNotNull(executeFunc, "Argument 'executeFunc' must not be null."); @@ -64,6 +80,9 @@ public Fetcher( } this.shouldFetchMore = new AtomicBoolean(true); this.cancelledRequestDiagnosticsTracker = cancelledRequestDiagnosticsTracker; + this.pkRangesWithSuccessfulRequests = pkRangesWithSuccessfulRequests; + this.globalEndpointManager = globalEndpointManager; + this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; } public final boolean shouldFetchMore() { @@ -154,7 +173,10 @@ private Mono> nextPage(RxDocumentServiceRequest request) { updateState(rsp, request); return rsp; }) - .doOnNext(response -> completed.set(true)) + .doOnNext(response -> { + completed.set(true); + this.pkRangesWithSuccessfulRequests.add(request.requestContext.resolvedPartitionKeyRange); + }) .doOnError(throwable -> completed.set(true)) .doFinally(signalType -> { // If the signal type is not cancel(which means success or error), we do not need to tracking the diagnostics here @@ -169,6 +191,22 @@ private Mono> nextPage(RxDocumentServiceRequest request) { return; } + if (request.requestContext.isRequestCancelledOnTimeout().get() && + Configs.isPartitionLevelCircuitBreakerEnabled() && + !request.requestContext.isRequestHedged() && +// request.requestContext.isRequestSendingStarted && + !this.pkRangesWithSuccessfulRequests.contains(request.requestContext.resolvedPartitionKeyRange)) { + + if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { + Optional firstContactedRegion = request.requestContext.cosmosDiagnostics.getDiagnosticsContext().getContactedRegionNames().stream().findFirst(); + + UnmodifiableList endpoints = request.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(request, filteredEndpoint.get(0)); + } + } + if (request.requestContext != null && request.requestContext.cosmosDiagnostics != null) { this.cancelledRequestDiagnosticsTracker.add(request.requestContext.cosmosDiagnostics); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java index 677be0dc8aa0..ed9c81eaf64a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java @@ -3,6 +3,8 @@ package com.azure.cosmos.implementation.query; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.caches.IPartitionKeyRangeCache; @@ -90,4 +92,8 @@ enum QueryCompatibilityMode { Mono populateFeedRangeHeader(RxDocumentServiceRequest request); Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions); + + GlobalEndpointManager getGlobalEndpointManager(); + + GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java index 99f1d708f443..3557f7ac866a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java @@ -3,7 +3,10 @@ package com.azure.cosmos.implementation.query; import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; @@ -19,6 +22,7 @@ import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.Supplier; @@ -35,7 +39,9 @@ public static Flux> getPaginatedQueryResultAsObservable( CosmosQueryRequestOptions cosmosQueryRequestOptions, BiFunction createRequestFunc, Function>> executeFunc, - int maxPageSize) { + int maxPageSize, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { int top = -1; return getPaginatedQueryResultAsObservable( @@ -52,7 +58,13 @@ public static Flux> getPaginatedQueryResultAsObservable( ImplementationBridgeHelpers .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() - .getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions)); + .getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), + ImplementationBridgeHelpers + .CosmosQueryRequestOptionsHelper + .getCosmosQueryRequestOptionsAccessor() + .getPkRangesWithSuccessfulRequests(cosmosQueryRequestOptions), + globalEndpointManager, + globalPartitionEndpointManagerForCircuitBreaker); } public static Flux> getPaginatedQueryResultAsObservable( @@ -63,7 +75,10 @@ public static Flux> getPaginatedQueryResultAsObservable( int maxPageSize, int maxPreFetchCount, OperationContextAndListenerTuple operationContext, - List cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + Set pkRangesWithSuccessfulRequests, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { return getPaginatedQueryResultAsObservable( continuationToken, @@ -74,7 +89,10 @@ public static Flux> getPaginatedQueryResultAsObservable( maxPreFetchCount, false, operationContext, - cancelledRequestDiagnosticsTracker); + cancelledRequestDiagnosticsTracker, + pkRangesWithSuccessfulRequests, + globalEndpointManager, + globalPartitionEndpointManagerForCircuitBreaker); } public static Flux> getChangeFeedQueryResultAsObservable( @@ -137,7 +155,10 @@ private static Flux> getPaginatedQueryResultAsObservable( int preFetchCount, boolean isChangeFeed, OperationContextAndListenerTuple operationContext, - List cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + Set pkRangesWithSuccessfulRequests, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { return getPaginatedQueryResultAsObservable( () -> new ServerSideOnlyContinuationFetcherImpl<>( @@ -148,7 +169,10 @@ private static Flux> getPaginatedQueryResultAsObservable( top, maxPageSize, operationContext, - cancelledRequestDiagnosticsTracker), + cancelledRequestDiagnosticsTracker, + pkRangesWithSuccessfulRequests, + globalEndpointManager, + globalPartitionEndpointManagerForCircuitBreaker), preFetchCount); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java index bb08e3b55235..d9c3b8c6f394 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java @@ -5,6 +5,9 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosDiagnostics; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; @@ -12,6 +15,7 @@ import reactor.core.publisher.Mono; import java.util.List; +import java.util.Set; import java.util.function.BiFunction; import java.util.function.Function; @@ -29,9 +33,12 @@ public ServerSideOnlyContinuationFetcherImpl(BiFunction cancelledRequestDiagnosticsTracker) { + List cancelledRequestDiagnosticsTracker, + Set pkRangesWithSuccessfulRequests, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { - super(executeFunc, isChangeFeed, top, maxItemCount, operationContext, cancelledRequestDiagnosticsTracker); + super(executeFunc, isChangeFeed, top, maxItemCount, operationContext, cancelledRequestDiagnosticsTracker, pkRangesWithSuccessfulRequests, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); checkNotNull(createRequestFunc, "Argument 'createRequestFunc' must not be null."); this.createRequestFunc = createRequestFunc; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index d0e5a852b22e..55623377b384 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -9,6 +9,7 @@ import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RequestOptions; import com.azure.cosmos.implementation.Strings; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; @@ -18,8 +19,10 @@ import java.time.Duration; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.function.Function; @@ -62,6 +65,7 @@ public class CosmosQueryRequestOptions { private List cancelledRequestDiagnosticsTracker = new ArrayList<>(); private PartitionKeyDefinition partitionKeyDefinition; private String collectionRid; + private Set pkRangesWithSuccessfulRequests = new HashSet<>(); /** * Instantiates a new query request options. @@ -109,6 +113,7 @@ public CosmosQueryRequestOptions() { this.cancelledRequestDiagnosticsTracker = options.cancelledRequestDiagnosticsTracker; this.partitionKeyDefinition = options.partitionKeyDefinition; this.collectionRid = options.collectionRid; + this.pkRangesWithSuccessfulRequests = options.pkRangesWithSuccessfulRequests; } void setOperationContextAndListenerTuple(OperationContextAndListenerTuple operationContextAndListenerTuple) { @@ -723,6 +728,14 @@ public void setCollectionRid(String collectionRid) { this.collectionRid = collectionRid; } + public Set getPkRangesWithSuccessfulRequests() { + return pkRangesWithSuccessfulRequests; + } + + public void setPkRangesWithSuccessfulRequests(Set pkRangesWithSuccessfulRequests) { + this.pkRangesWithSuccessfulRequests = pkRangesWithSuccessfulRequests; + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// /////////////////////////////////////////////////////////////////////////////////////////// @@ -903,6 +916,16 @@ public String getCollectionRid(CosmosQueryRequestOptions options) { return options.getCollectionRid(); } + @Override + public void setPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options, Set pkRangesWithSuccessfulRequests) { + options.setPkRangesWithSuccessfulRequests(pkRangesWithSuccessfulRequests); + } + + @Override + public Set getPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options) { + return options.getPkRangesWithSuccessfulRequests(); + } + @Override public List getExcludeRegions(CosmosQueryRequestOptions options) { return options.getExcludedRegions(); From c835a7759969f56d691d7c303b6c8ead60982bdc Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 11 May 2024 12:50:55 -0400 Subject: [PATCH 024/140] Adding shared state for point and query operations with availability strategy enabled. --- .../implementation/query/FetcherTest.java | 7 +- .../DocumentServiceRequestContext.java | 20 ++ .../implementation/FeedOperationContext.java | 33 ++++ .../implementation/PointOperationContext.java | 35 ++++ .../implementation/RxDocumentClientImpl.java | 175 +++++++++++------- .../query/DocumentProducer.java | 4 - .../cosmos/implementation/query/Fetcher.java | 16 +- .../implementation/query/Paginator.java | 8 - ...ServerSideOnlyContinuationFetcherImpl.java | 3 +- 9 files changed, 210 insertions(+), 91 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java index 9887984654cc..8b2bf9c07d2a 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java @@ -106,10 +106,8 @@ public void query(CosmosQueryRequestOptions options, int top) { .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() .getCancelledRequestDiagnosticsTracker(options), - ImplementationBridgeHelpers - .CosmosQueryRequestOptionsHelper - .getCosmosQueryRequestOptionsAccessor() - .getPkRangesWithSuccessfulRequests(options)); + null, + null); validateFetcher(fetcher, options, top, feedResponseList); } @@ -175,6 +173,7 @@ public void changeFeed() { .getCosmosChangeFeedRequestOptionsAccessor() .getOperationContext(options), null, + null, null); validateFetcher(fetcher, options, feedResponseList); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index c22bd3443d36..43cd25094e1e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -56,6 +56,10 @@ public class DocumentServiceRequestContext implements Cloneable { // For cancelled rntbd requests, track the response as OperationCancelledException which later will be used to populate the cosmosDiagnostics public final Map rntbdCancelledRequestMap = new ConcurrentHashMap<>(); + private PointOperationContext pointOperationContext; + + private FeedOperationContext feedOperationContext; + public DocumentServiceRequestContext() {} /** @@ -180,5 +184,21 @@ public List getUnavailableRegionsForPartition() { public void setUnavailableRegionsForPartition(List unavailableRegionsForPartition) { this.unavailableRegionsForPartition = unavailableRegionsForPartition; } + + public PointOperationContext getPointOperationContext() { + return pointOperationContext; + } + + public void setPointOperationContext(PointOperationContext pointOperationContext) { + this.pointOperationContext = pointOperationContext; + } + + public FeedOperationContext getFeedOperationContext() { + return feedOperationContext; + } + + public void setFeedOperationContext(FeedOperationContext feedOperationContext) { + this.feedOperationContext = feedOperationContext; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java new file mode 100644 index 000000000000..8544f101062f --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import java.util.Set; + +public class FeedOperationContext { + + private final Set partitionKeyRangesWithSuccess; + + private boolean isRequestHedged; + + public FeedOperationContext(Set partitionKeyRangesWithSuccess) { + this.partitionKeyRangesWithSuccess = partitionKeyRangesWithSuccess; + } + + public void setIsRequestHedged(boolean isRequestHedged) { + this.isRequestHedged = isRequestHedged; + } + + public boolean getIsRequestHedged() { + return this.isRequestHedged; + } + + public void addPartitionKeyRangeWithSuccess(PartitionKeyRange partitionKeyRange) { + this.partitionKeyRangesWithSuccess.add(partitionKeyRange); + } + + public boolean hasPartitionKeyRangeSeenSuccess(PartitionKeyRange partitionKeyRange) { + return this.partitionKeyRangesWithSuccess.contains(partitionKeyRange); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java new file mode 100644 index 000000000000..6a8be5b76594 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java @@ -0,0 +1,35 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicBoolean; + +public class PointOperationContext { + + private final AtomicBoolean hasOperationSeenSuccess; + + private boolean isRequestHedged; + + public PointOperationContext(AtomicBoolean hasOperationSeenSuccess) { + this.hasOperationSeenSuccess = hasOperationSeenSuccess; + } + + public void setIsRequestHedged(boolean isRequestHedged) { + this.isRequestHedged = isRequestHedged; + } + + public boolean getIsRequestHedged() { + return this.isRequestHedged; + } + + public void setHasOperationSeenSuccess() { + this.hasOperationSeenSuccess.set(true); + } + + public boolean getHasOperationSeenSuccess() { + return hasOperationSeenSuccess.get(); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 489dfcc0559e..f54d02bf2aca 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -107,6 +107,7 @@ import java.util.Map; import java.util.NoSuchElementException; import java.util.Optional; +import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; @@ -2103,14 +2104,14 @@ public Mono> createDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Create, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> createDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> createDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, - isRequestHedged, + pointOperationContext, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2124,7 +2125,7 @@ private Mono> createDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap) { ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2150,10 +2151,11 @@ private Mono> createDocumentCore( finalRetryPolicyInstance, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap), + collectionRoutingMap, + pointOperationContext), requestRetryPolicy), scopedDiagnosticsFactory - ), requestReference, isRequestHedged); + ), requestReference); } private Mono> createDocumentInternal( @@ -2164,7 +2166,8 @@ private Mono> createDocumentInternal( DocumentClientRetryPolicy requestRetryPolicy, DiagnosticsClientContext clientContextOverride, AtomicReference documentServiceRequestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { try { logger.debug("Creating a Document. collectionLink: [{}]", collectionLink); @@ -2176,6 +2179,7 @@ private Mono> createDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); documentServiceRequestReference.set(request); + request.requestContext.setPointOperationContext(pointOperationContext); if (requestRetryPolicy != null) { requestRetryPolicy.onBeforeSendRequest(request); @@ -2222,24 +2226,32 @@ private static Mono getPointOperationResponseMonoWithE2ETimeout( private Mono handleRegionFeedbackForPointOperation( Mono response, - AtomicReference requestReference, - boolean isRequestHedged) { + AtomicReference requestReference) { - return response.doOnError(throwable -> { + return response + .doOnSuccess(ignore -> { + RxDocumentServiceRequest succeededRequest = requestReference.get(); - if (!isRequestHedged) { + PointOperationContext pointOperationContext = succeededRequest.requestContext.getPointOperationContext(); + pointOperationContext.setHasOperationSeenSuccess(); + }) + .doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { - OperationCancelledException exception = Utils.as(throwable, OperationCancelledException.class); - Optional firstContactedRegion = exception.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().stream().findFirst(); + RxDocumentServiceRequest failedRequest = requestReference.get(); + PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); - UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + OperationCancelledException exception = Utils.as(throwable, OperationCancelledException.class); + Optional firstContactedRegion = exception.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().stream().findFirst(); - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); + UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); + } } - } - }); + }); } private static Throwable getCancellationExceptionForPointOperations( @@ -2297,8 +2309,8 @@ public Mono> upsertDocument(String collectionLink, Ob return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Upsert, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> upsertDocumentCore( - collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap), + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> upsertDocumentCore( + collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2311,7 +2323,7 @@ private Mono> upsertDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -2336,9 +2348,10 @@ private Mono> upsertDocumentCore( finalRetryPolicyInstance, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap), + collectionRoutingMap, + pointOperationContext), finalRetryPolicyInstance), - scopedDiagnosticsFactory), requestReference, isRequestHedged); + scopedDiagnosticsFactory), requestReference); } private Mono> upsertDocumentInternal( @@ -2349,7 +2362,8 @@ private Mono> upsertDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { try { logger.debug("Upserting a Document. collectionLink: [{}]", collectionLink); @@ -2369,6 +2383,7 @@ private Mono> upsertDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + request.requestContext.setPointOperationContext(pointOperationContext); requestReference.set(request); if (retryPolicyInstance != null) { @@ -2392,13 +2407,13 @@ public Mono> replaceDocument(String documentLink, Obj return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> replaceDocumentCore( documentLink, document, opt, e2ecfg, clientCtxOverride, - isRequestHedged, + pointOperationContext, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2411,7 +2426,7 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -2438,9 +2453,10 @@ private Mono> replaceDocumentCore( endToEndPolicyConfig, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap), + collectionRoutingMap, + pointOperationContext), requestRetryPolicy), - scopedDiagnosticsFactory), requestReference, isRequestHedged); + scopedDiagnosticsFactory), requestReference); } private Mono> replaceDocumentInternal( @@ -2451,7 +2467,8 @@ private Mono> replaceDocumentInternal( CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2471,7 +2488,8 @@ private Mono> replaceDocumentInternal( retryPolicyInstance, clientContextOverride, requestReference, - collectionRoutingMap); + collectionRoutingMap, + pointOperationContext); } catch (Exception e) { logger.debug("Failure in replacing a document due to [{}]", e.getMessage()); @@ -2484,12 +2502,12 @@ public Mono> replaceDocument(Document document, Reque return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> replaceDocumentCore( document, opt, e2ecfg, clientCtxOverride, - isRequestHedged, + pointOperationContext, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2501,7 +2519,7 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap) { DocumentClientRetryPolicy requestRetryPolicy = @@ -2522,8 +2540,9 @@ private Mono> replaceDocumentCore( endToEndPolicyConfig, clientContextOverride, requestReference, - collectionRoutingMap), - requestRetryPolicy), requestReference, isRequestHedged); + collectionRoutingMap, + pointOperationContext), + requestRetryPolicy), requestReference); } private Mono> replaceDocumentInternal( @@ -2533,7 +2552,8 @@ private Mono> replaceDocumentInternal( CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { try { if (document == null) { @@ -2547,7 +2567,8 @@ private Mono> replaceDocumentInternal( retryPolicyInstance, clientContextOverride, requestReference, - collectionRoutingMap); + collectionRoutingMap, + pointOperationContext); } catch (Exception e) { logger.debug("Failure in replacing a database due to [{}]", e.getMessage()); @@ -2562,7 +2583,8 @@ private Mono> replaceDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { if (document == null) { throw new IllegalArgumentException("document"); @@ -2622,6 +2644,7 @@ private Mono> replaceDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + req.requestContext.setPointOperationContext(pointOperationContext); requestReference.set(req); if (retryPolicyInstance != null) { @@ -2670,13 +2693,13 @@ public Mono> patchDocument(String documentLink, return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> patchDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> patchDocumentCore( documentLink, cosmosPatchOperations, opt, e2ecfg, clientCtxOverride, - isRequestHedged, + pointOperationContext, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2689,7 +2712,7 @@ private Mono> patchDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -2710,9 +2733,10 @@ private Mono> patchDocumentCore( documentClientRetryPolicy, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap), + collectionRoutingMap, + pointOperationContext), documentClientRetryPolicy), - scopedDiagnosticsFactory), requestReference, isRequestHedged); + scopedDiagnosticsFactory), requestReference); } private Mono> patchDocumentInternal( @@ -2722,7 +2746,8 @@ private Mono> patchDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { checkArgument(StringUtils.isNotEmpty(documentLink), "expected non empty documentLink"); checkNotNull(cosmosPatchOperations, "expected non null cosmosPatchOperations"); @@ -2786,6 +2811,7 @@ private Mono> patchDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + req.requestContext.setPointOperationContext(pointOperationContext); requestReference.set(req); if (retryPolicyInstance != null) { @@ -2802,13 +2828,13 @@ public Mono> deleteDocument(String documentLink, Requ return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> deleteDocumentCore( documentLink, null, opt, e2ecfg, clientCtxOverride, - isRequestHedged, + pointOperationContext, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2820,13 +2846,13 @@ public Mono> deleteDocument(String documentLink, Inte return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> deleteDocumentCore( documentLink, internalObjectNode, opt, e2ecfg, clientCtxOverride, - isRequestHedged, + pointOperationContext, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2839,7 +2865,7 @@ private Mono> deleteDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -2860,9 +2886,10 @@ private Mono> deleteDocumentCore( requestRetryPolicy, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap), + collectionRoutingMap, + pointOperationContext), requestRetryPolicy), - scopedDiagnosticsFactory), requestReference, isRequestHedged); + scopedDiagnosticsFactory), requestReference); } private Mono> deleteDocumentInternal( @@ -2872,7 +2899,8 @@ private Mono> deleteDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2976,7 +3004,7 @@ private Mono> readDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Read, - (opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, isRequestHedged, collectionRoutingMap), + (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap), options, false, innerDiagnosticsFactory, @@ -2988,7 +3016,7 @@ private Mono> readDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -3007,10 +3035,11 @@ private Mono> readDocumentCore( retryPolicyInstance, scopedDiagnosticsFactory, requestReference, - collectionRoutingMap), + collectionRoutingMap, + pointOperationContext), retryPolicyInstance), scopedDiagnosticsFactory - ), requestReference, isRequestHedged); + ), requestReference); } private Mono> readDocumentInternal( @@ -3019,7 +3048,8 @@ private Mono> readDocumentInternal( DocumentClientRetryPolicy retryPolicyInstance, DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, - Utils.ValueHolder collectionRoutingMap) { + Utils.ValueHolder collectionRoutingMap, + PointOperationContext pointOperationContext) { try { if (StringUtils.isEmpty(documentLink)) { @@ -3045,6 +3075,7 @@ private Mono> readDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); + req.requestContext.setPointOperationContext(pointOperationContext); requestReference.set(req); if (retryPolicyInstance != null) { @@ -5628,8 +5659,6 @@ private Mono> wrapPointOperationWithAvailabilityStrat CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); - PartitionKeyDefinition partitionKeyDefinition = collection.getPartitionKey(); - // todo: validate if the below is possible if (collectionRoutingMapValueHolder.v == null) { return Mono.error(new NotFoundException("collectionRoutingMapValueHolder.v cannot be null!")); @@ -5644,9 +5673,13 @@ private Mono> wrapPointOperationWithAvailabilityStrat idempotentWriteRetriesEnabled, nonNullRequestOptions); + AtomicBoolean isOperationSuccessful = new AtomicBoolean(false); + if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, false, collectionRoutingMapValueHolder); + PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful); + pointOperationContextForMainRequest.setIsRequestHedged(false); + return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForMainRequest, collectionRoutingMapValueHolder); } ThresholdBasedAvailabilityStrategy availabilityStrategy = @@ -5665,8 +5698,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient + PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful); + pointOperationContextForMainRequest.setIsRequestHedged(false); Mono initialMonoAcrossAllRegions = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, false, collectionRoutingMapValueHolder) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForMainRequest, collectionRoutingMapValueHolder) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isCosmosException, @@ -5693,8 +5728,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors + PointOperationContext pointOperationContextForHedgedRequest = new PointOperationContext(isOperationSuccessful); + pointOperationContextForHedgedRequest.setIsRequestHedged(true); Mono regionalCrossRegionRetryMono = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, true, collectionRoutingMapValueHolder) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForHedgedRequest, collectionRoutingMapValueHolder) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isNonTransientCosmosException, @@ -5978,8 +6015,13 @@ private Mono executeFeedOperationWithAvailabilityStrategy( false, initialExcludedRegions); + Set partitionKeyRangesWithSuccess = ConcurrentHashMap.newKeySet(); + if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible + FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess); + feedOperationContextForMainRequest.setIsRequestHedged(false); + req.requestContext.setFeedOperationContext(feedOperationContextForMainRequest); return feedOperation.apply(retryPolicyFactory, req); } @@ -5997,6 +6039,9 @@ private Mono executeFeedOperationWithAvailabilityStrategy( // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient + FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess); + feedOperationContextForMainRequest.setIsRequestHedged(false); + clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForMainRequest); Mono> initialMonoAcrossAllRegions = feedOperation.apply(retryPolicyFactory, clonedRequest) .map(NonTransientFeedOperationResult::new) @@ -6022,7 +6067,9 @@ private Mono executeFeedOperationWithAvailabilityStrategy( region) ); - clonedRequest.requestContext.setIsRequestHedged(true); + FeedOperationContext feedOperationContextForHedgedRequest = new FeedOperationContext(partitionKeyRangesWithSuccess); + feedOperationContextForHedgedRequest.setIsRequestHedged(true); + clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForHedgedRequest); // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values @@ -6114,7 +6161,7 @@ Mono> apply( RequestOptions requestOptions, CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, DiagnosticsClientContext clientContextOverride, - boolean isRequestHedged, + PointOperationContext pointOperationContext, Utils.ValueHolder collectionRoutingMap); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index 5453be4040cd..594d4240c8fd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -212,10 +212,6 @@ public Flux produceAsync() { .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() .getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), - ImplementationBridgeHelpers - .CosmosQueryRequestOptionsHelper - .getCosmosQueryRequestOptionsAccessor() - .getPkRangesWithSuccessfulRequests(cosmosQueryRequestOptions), client.getGlobalEndpointManager(), client.getGlobalPartitionEndpointManagerForCircuitBreaker() ) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 7f4168ff9eb4..78cf42e34c9c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -6,6 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.implementation.Configs; +import com.azure.cosmos.implementation.FeedOperationContext; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.PartitionKeyRange; @@ -43,7 +44,6 @@ abstract class Fetcher { private final AtomicInteger maxItemCount; private final AtomicInteger top; private final List cancelledRequestDiagnosticsTracker; - private final Set pkRangesWithSuccessfulRequests; private final GlobalEndpointManager globalEndpointManager; private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; @@ -54,7 +54,6 @@ public Fetcher( int maxItemCount, OperationContextAndListenerTuple operationContext, List cancelledRequestDiagnosticsTracker, - Set pkRangesWithSuccessfulRequests, GlobalEndpointManager globalEndpointManager, GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { @@ -80,7 +79,6 @@ public Fetcher( } this.shouldFetchMore = new AtomicBoolean(true); this.cancelledRequestDiagnosticsTracker = cancelledRequestDiagnosticsTracker; - this.pkRangesWithSuccessfulRequests = pkRangesWithSuccessfulRequests; this.globalEndpointManager = globalEndpointManager; this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; } @@ -175,7 +173,9 @@ private Mono> nextPage(RxDocumentServiceRequest request) { }) .doOnNext(response -> { completed.set(true); - this.pkRangesWithSuccessfulRequests.add(request.requestContext.resolvedPartitionKeyRange); + + FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); + feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange); }) .doOnError(throwable -> completed.set(true)) .doFinally(signalType -> { @@ -191,11 +191,9 @@ private Mono> nextPage(RxDocumentServiceRequest request) { return; } - if (request.requestContext.isRequestCancelledOnTimeout().get() && - Configs.isPartitionLevelCircuitBreakerEnabled() && - !request.requestContext.isRequestHedged() && -// request.requestContext.isRequestSendingStarted && - !this.pkRangesWithSuccessfulRequests.contains(request.requestContext.resolvedPartitionKeyRange)) { + FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); + + if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange)) { if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { Optional firstContactedRegion = request.requestContext.cosmosDiagnostics.getDiagnosticsContext().getContactedRegionNames().stream().findFirst(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java index 3557f7ac866a..ecf7b08b62f5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java @@ -59,10 +59,6 @@ public static Flux> getPaginatedQueryResultAsObservable( .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() .getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), - ImplementationBridgeHelpers - .CosmosQueryRequestOptionsHelper - .getCosmosQueryRequestOptionsAccessor() - .getPkRangesWithSuccessfulRequests(cosmosQueryRequestOptions), globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); } @@ -76,7 +72,6 @@ public static Flux> getPaginatedQueryResultAsObservable( int maxPreFetchCount, OperationContextAndListenerTuple operationContext, List cancelledRequestDiagnosticsTracker, - Set pkRangesWithSuccessfulRequests, GlobalEndpointManager globalEndpointManager, GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { @@ -90,7 +85,6 @@ public static Flux> getPaginatedQueryResultAsObservable( false, operationContext, cancelledRequestDiagnosticsTracker, - pkRangesWithSuccessfulRequests, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); } @@ -156,7 +150,6 @@ private static Flux> getPaginatedQueryResultAsObservable( boolean isChangeFeed, OperationContextAndListenerTuple operationContext, List cancelledRequestDiagnosticsTracker, - Set pkRangesWithSuccessfulRequests, GlobalEndpointManager globalEndpointManager, GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { @@ -170,7 +163,6 @@ private static Flux> getPaginatedQueryResultAsObservable( maxPageSize, operationContext, cancelledRequestDiagnosticsTracker, - pkRangesWithSuccessfulRequests, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker), preFetchCount); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java index d9c3b8c6f394..9fddc3f795cc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java @@ -34,11 +34,10 @@ public ServerSideOnlyContinuationFetcherImpl(BiFunction cancelledRequestDiagnosticsTracker, - Set pkRangesWithSuccessfulRequests, GlobalEndpointManager globalEndpointManager, GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { - super(executeFunc, isChangeFeed, top, maxItemCount, operationContext, cancelledRequestDiagnosticsTracker, pkRangesWithSuccessfulRequests, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); + super(executeFunc, isChangeFeed, top, maxItemCount, operationContext, cancelledRequestDiagnosticsTracker, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); checkNotNull(createRequestFunc, "Argument 'createRequestFunc' must not be null."); this.createRequestFunc = createRequestFunc; From 20e2f5c7475e707ee528df2f7cc2070c8932db63 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 13 May 2024 07:41:11 -0400 Subject: [PATCH 025/140] Implementing 408 handling for partition-level circuit breaker. --- .../PartitionLevelCircuitBreakerTests.java | 274 +++++++++++++++++- .../SpyClientUnderTestFactory.java | 2 +- .../GlobalAddressResolverTest.java | 3 +- .../query/DocumentProducerTest.java | 3 +- .../implementation/FeedOperationContext.java | 9 +- .../implementation/PointOperationContext.java | 9 +- .../implementation/RxDocumentClientImpl.java | 50 +++- .../GoneAndRetryWithRetryPolicy.java | 4 - .../query/ChangeFeedFetcher.java | 2 +- .../cosmos/implementation/query/Fetcher.java | 29 +- 10 files changed, 356 insertions(+), 29 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 5472ea3e0ef8..ae402f2b62c0 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -9,7 +9,10 @@ import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosAsyncDatabase; import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; import com.azure.cosmos.TestObject; +import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; import com.azure.cosmos.faultinjection.FaultInjectionTestBase; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.models.CosmosContainerProperties; @@ -17,6 +20,7 @@ import com.azure.cosmos.models.CosmosItemResponse; import com.azure.cosmos.models.CosmosPatchItemRequestOptions; import com.azure.cosmos.models.CosmosPatchOperations; +import com.azure.cosmos.models.CosmosQueryRequestOptions; import com.azure.cosmos.models.FeedRange; import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.PartitionKey; @@ -36,7 +40,10 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Factory; import org.testng.annotations.Test; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; +import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; @@ -140,13 +147,12 @@ public void readHits503InFirstPreferredRegion() { ); } - CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); logger.info("Sleep for 60 seconds"); Thread.sleep(60_000); for (int i = 1; i <= 30; i++) { - response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); + CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); assertThat(response).isNotNull(); @@ -760,8 +766,270 @@ public void queryHits503InFirstPreferredRegion() { } @Test(groups = {"multi-master"}) - public void readHits408InFirstPreferredRegion() { + public void queryWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("queryWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build(); + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + TestObject testObject = TestObject.create(); + + String itemIdMappingToUnhealthyPartition = testObject.getId(); + + container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.QUERY_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.GONE) + .build(); + + FaultInjectionRule goneExceptionRule = new FaultInjectionRuleBuilder("gone-exception-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(Duration.ofSeconds(45)) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(goneExceptionRule)) + .block(); + + String sqlQuery = "SELECT * FROM C"; + + for (int i = 1; i <= 15; i++) { + FeedResponse response = container + .queryItems( + sqlQuery, + new CosmosQueryRequestOptions() + .setPartitionKey(new PartitionKey(itemIdMappingToUnhealthyPartition)) + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), + TestObject.class) + .byPage() + .onErrorResume(throwable -> { + if (throwable instanceof OperationCancelledException) { + logger.error("OperationCancelledException thrown!"); + } + + return Flux.empty(); + }) + .blockLast(); + + logger.info("Hit count : {}", goneExceptionRule.getHitCount()); + + if (response != null) { + assertThat(response).isNotNull(); + assertThat(response.getCosmosDiagnostics()).isNotNull(); + + response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } + + logger.info("Sleep for 120 seconds"); + Thread.sleep(120_000); + + for (int i = 1; i <= 30; i++) { + FeedResponse response = container + .queryItems( + sqlQuery, + new CosmosQueryRequestOptions() + .setPartitionKey(new PartitionKey(itemIdMappingToUnhealthyPartition)) + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), + TestObject.class) + .byPage() + .onErrorResume(throwable -> { + if (throwable instanceof OperationCancelledException) { + logger.error("OperationCancelledException thrown!"); + } + + return Flux.empty(); + }) + .blockLast(); + + logger.info("Hit count : {}", goneExceptionRule.getHitCount()); + + if (response != null) { + assertThat(response).isNotNull(); + assertThat(response.getCosmosDiagnostics()).isNotNull(); + + response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Query operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + + @Test(groups = {"multi-master"}) + public void readWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion() { + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("readWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build(); + + CosmosAsyncClient client = clientBuilder.endToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg).buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + TestObject testObject = TestObject.create(); + + String itemIdMappingToUnhealthyPartition = testObject.getId(); + + container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.READ_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.GONE) + .build(); + FaultInjectionRule goneExceptionRule = new FaultInjectionRuleBuilder("gone-exception-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(Duration.ofSeconds(45)) + .build(); + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(goneExceptionRule)) + .block(); + + for (int i = 1; i <= 15; i++) { + CosmosItemResponse response = container.readItem( + itemIdMappingToUnhealthyPartition, + new PartitionKey(itemIdMappingToUnhealthyPartition), + new CosmosItemRequestOptions().setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), + TestObject.class + ) + .onErrorResume(throwable -> { + if (throwable instanceof OperationCancelledException) { + logger.error("OperationCancelledException thrown!"); + } + + return Mono.empty(); + }) + .block(); + + logger.info("Hit count : {}", goneExceptionRule.getHitCount()); + + if (response != null) { + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } + + logger.info("Sleep for 120 seconds"); + + Thread.sleep(120_000); + + for (int i = 1; i <= 30; i++) { + CosmosItemResponse response = container.readItem( + itemIdMappingToUnhealthyPartition, + new PartitionKey(itemIdMappingToUnhealthyPartition), + new CosmosItemRequestOptions().setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), + TestObject.class + ) + .onErrorResume(throwable -> { + if (throwable instanceof OperationCancelledException) { + logger.error("OperationCancelledException thrown!"); + } + + return Mono.empty(); + }) + .block(); + + logger.info("Hit count : {}", goneExceptionRule.getHitCount()); + + if (response != null) { + assertThat(response).isNotNull(); + assertThat(response.getDiagnostics()).isNotNull(); + + response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Read operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } } private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index dcb7e03cf866..2ef222c39410 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -121,7 +121,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, GlobalEndpointManager globalEndpointManager, HttpClient rxClient, ApiType apiType, - IGlobalPartitionEndpointManager globalPartitionEndpointManager) { + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { this.origRxGatewayStoreModel = super.createRxGatewayProxy( sessionContainer, consistencyLevel, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java index 414e3efd2110..f80f8b33ba50 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java @@ -12,6 +12,7 @@ import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.OpenConnectionResponse; @@ -52,7 +53,7 @@ public class GlobalAddressResolverTest { private HttpClient httpClient; private GlobalEndpointManager endpointManager; - private IGlobalPartitionEndpointManager globalPartitionEndpointManager; + private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private IAuthorizationTokenProvider authorizationTokenProvider; private UserAgentContainer userAgentContainer; private RxCollectionCache collectionCache; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 25fc6a0c66a9..712eacad2ad0 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -10,6 +10,7 @@ import com.azure.cosmos.implementation.Document; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.IRetryPolicyFactory; @@ -119,7 +120,7 @@ private IRetryPolicyFactory mockDocumentClientIRetryPolicyFactory() { } GlobalEndpointManager globalEndpointManager = Mockito.mock(GlobalEndpointManager.class); - IGlobalPartitionEndpointManager globalPartitionEndpointManager = Mockito.mock(IGlobalPartitionEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); Mockito.doReturn(url).when(globalEndpointManager).resolveServiceEndpoint(Mockito.any(RxDocumentServiceRequest.class)); doReturn(false).when(globalEndpointManager).isClosed(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java index 8544f101062f..aa55670d81a8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java @@ -9,10 +9,13 @@ public class FeedOperationContext { private final Set partitionKeyRangesWithSuccess; + private final boolean isThresholdBasedAvailabilityStrategyEnabled; + private boolean isRequestHedged; - public FeedOperationContext(Set partitionKeyRangesWithSuccess) { + public FeedOperationContext(Set partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { this.partitionKeyRangesWithSuccess = partitionKeyRangesWithSuccess; + this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; } public void setIsRequestHedged(boolean isRequestHedged) { @@ -30,4 +33,8 @@ public void addPartitionKeyRangeWithSuccess(PartitionKeyRange partitionKeyRange) public boolean hasPartitionKeyRangeSeenSuccess(PartitionKeyRange partitionKeyRange) { return this.partitionKeyRangesWithSuccess.contains(partitionKeyRange); } + + public boolean isThresholdBasedAvailabilityStrategyEnabled() { + return isThresholdBasedAvailabilityStrategyEnabled; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java index 6a8be5b76594..1857666c033c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java @@ -11,10 +11,13 @@ public class PointOperationContext { private final AtomicBoolean hasOperationSeenSuccess; + private final boolean isThresholdBasedAvailabilityStrategyEnabled; + private boolean isRequestHedged; - public PointOperationContext(AtomicBoolean hasOperationSeenSuccess) { + public PointOperationContext(AtomicBoolean hasOperationSeenSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { this.hasOperationSeenSuccess = hasOperationSeenSuccess; + this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; } public void setIsRequestHedged(boolean isRequestHedged) { @@ -32,4 +35,8 @@ public void setHasOperationSeenSuccess() { public boolean getHasOperationSeenSuccess() { return hasOperationSeenSuccess.get(); } + + public boolean isThresholdBasedAvailabilityStrategyEnabled() { + return isThresholdBasedAvailabilityStrategyEnabled; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index f54d02bf2aca..51a7f5b10c60 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2241,15 +2241,49 @@ private Mono handleRegionFeedbackForPointOperation( RxDocumentServiceRequest failedRequest = requestReference.get(); PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); - if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { + + if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + OperationCancelledException exception = Utils.as(throwable, OperationCancelledException.class); + Optional firstContactedRegion = exception.getDiagnostics().getContactedRegionNames().stream().findFirst(); + + UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); + } + } else { OperationCancelledException exception = Utils.as(throwable, OperationCancelledException.class); - Optional firstContactedRegion = exception.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().stream().findFirst(); + Optional firstContactedRegion = exception.getDiagnostics().getContactedRegionNames().stream().findFirst(); + + UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); + } + } + }) + .doOnCancel(() -> { + RxDocumentServiceRequest failedRequest = requestReference.get(); + PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); + + if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { + + if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + Optional firstContactedRegion = failedRequest.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); } + } else { + Optional firstContactedRegion = failedRequest.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); + + UnmodifiableList endpoints = failedRequest.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, failedRequest.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(failedRequest, filteredEndpoint.get(0)); } }); } @@ -5677,7 +5711,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful); + PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful, false); pointOperationContextForMainRequest.setIsRequestHedged(false); return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForMainRequest, collectionRoutingMapValueHolder); } @@ -5698,7 +5732,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient - PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful); + PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful, true); pointOperationContextForMainRequest.setIsRequestHedged(false); Mono initialMonoAcrossAllRegions = callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForMainRequest, collectionRoutingMapValueHolder) @@ -5728,7 +5762,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors - PointOperationContext pointOperationContextForHedgedRequest = new PointOperationContext(isOperationSuccessful); + PointOperationContext pointOperationContextForHedgedRequest = new PointOperationContext(isOperationSuccessful, true); pointOperationContextForHedgedRequest.setIsRequestHedged(true); Mono regionalCrossRegionRetryMono = callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForHedgedRequest, collectionRoutingMapValueHolder) @@ -6019,7 +6053,7 @@ private Mono executeFeedOperationWithAvailabilityStrategy( if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess); + FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess, false); feedOperationContextForMainRequest.setIsRequestHedged(false); req.requestContext.setFeedOperationContext(feedOperationContextForMainRequest); return feedOperation.apply(retryPolicyFactory, req); @@ -6039,7 +6073,7 @@ private Mono executeFeedOperationWithAvailabilityStrategy( // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient - FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess); + FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess, true); feedOperationContextForMainRequest.setIsRequestHedged(false); clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForMainRequest); Mono> initialMonoAcrossAllRegions = @@ -6067,7 +6101,7 @@ private Mono executeFeedOperationWithAvailabilityStrategy( region) ); - FeedOperationContext feedOperationContextForHedgedRequest = new FeedOperationContext(partitionKeyRangesWithSuccess); + FeedOperationContext feedOperationContextForHedgedRequest = new FeedOperationContext(partitionKeyRangesWithSuccess, true); feedOperationContextForHedgedRequest.setIsRequestHedged(true); clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForHedgedRequest); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java index dac552283e99..1380b772eac3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java @@ -322,10 +322,6 @@ private static boolean bookmarkException(RxDocumentServiceRequest request, Excep if (request.requestContext == null) { return false; } - - if (request.locationLevelCircuitBreakerRequestContext != null) { - return request.locationLevelCircuitBreakerRequestContext.tryRecordRegionScopedFailure(request.requestContext.locationEndpointToRoute, cosmosException.getStatusCode(), cosmosException.getSubStatusCode()); - } } return false; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java index 1109c4a2c85c..466454b8e3b5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java @@ -48,7 +48,7 @@ public ChangeFeedFetcher( int maxItemCount, boolean isSplitHandlingDisabled, OperationContextAndListenerTuple operationContext) { - super(executeFunc, true, top, maxItemCount, operationContext, null, null, null, null); + super(executeFunc, true, top, maxItemCount, operationContext, null, null, null); checkNotNull(client, "Argument 'client' must not be null."); checkNotNull(createRequestFunc, "Argument 'createRequestFunc' must not be null."); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 78cf42e34c9c..7ee80c807a82 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -10,6 +10,7 @@ import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; @@ -174,8 +175,10 @@ private Mono> nextPage(RxDocumentServiceRequest request) { .doOnNext(response -> { completed.set(true); - FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); - feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange); + if (request.getResourceType() == ResourceType.Document) { + FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); + feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange); + } }) .doOnError(throwable -> completed.set(true)) .doFinally(signalType -> { @@ -193,18 +196,28 @@ private Mono> nextPage(RxDocumentServiceRequest request) { FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); - if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange)) { + if (feedOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { + if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange)) { - if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { - Optional firstContactedRegion = request.requestContext.cosmosDiagnostics.getDiagnosticsContext().getContactedRegionNames().stream().findFirst(); + if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { + Optional firstContactedRegion = request.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); - UnmodifiableList endpoints = request.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + UnmodifiableList endpoints = request.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(request, filteredEndpoint.get(0)); + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(request, filteredEndpoint.get(0)); + } } + } else { + Optional firstContactedRegion = request.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); + + UnmodifiableList endpoints = request.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); + List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(request, filteredEndpoint.get(0)); } + if (request.requestContext != null && request.requestContext.cosmosDiagnostics != null) { this.cancelledRequestDiagnosticsTracker.add(request.requestContext.cosmosDiagnostics); } From 9998068082e4bf0d6b9c6354229364f1e7a285f3 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 13 May 2024 08:29:36 -0400 Subject: [PATCH 026/140] Implementing 408 handling for partition-level circuit breaker. --- .../GlobalPartitionEndpointManagerForCircuitBreaker.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 09113489486a..682fd4f1f52c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -406,11 +406,6 @@ public boolean isRegionAvailableToProcessRequests() { return this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.Available || this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable; } - - public boolean isRegionUnavailableToProcessRequest() { - return this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable; - - } } private enum PartitionScopedRegionUnavailabilityStatus { From 5007f9db138dacc2feacbb5fe0d9c7256d9120e1 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 13 May 2024 10:29:16 -0400 Subject: [PATCH 027/140] Implementing 408 handling for partition-level circuit breaker. --- ...itionEndpointManagerForCircuitBreaker.java | 66 +++++++++++-------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 682fd4f1f52c..1bafe2b4ae2a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -28,9 +28,11 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final GlobalEndpointManager globalEndpointManager; private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; + private final ConcurrentHashMap partitionsWithPossibleUnavailableRegions; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); + this.partitionsWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; } @@ -68,7 +70,7 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); } - isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(failedLocation)); + isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(partitionKeyRangeAsKey, failedLocation)); if (isFailureThresholdBreached.get()) { @@ -77,7 +79,7 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); isFailoverPossible.set( - partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); + partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeAsKey, applicableEndpoints)); } return partitionKeyRangeFailoverInfoAsVal; @@ -125,7 +127,7 @@ public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceReq partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); } - partitionKeyRangeFailoverInfoAsVal.bookmarkSuccess(succeededLocation); + partitionKeyRangeFailoverInfoAsVal.bookmarkSuccess(partitionKeyRange, succeededLocation); return partitionKeyRangeFailoverInfoAsVal; }); } @@ -141,6 +143,7 @@ public List getUnavailableLocationsForPartition(PartitionKeyRange partition this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); List unavailableLocations = new ArrayList<>(); + boolean doesPartitionHaveUnavailableLocations = false; if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { Map locationEndpointToFailureMetricsForPartition = @@ -152,35 +155,42 @@ public List getUnavailableLocationsForPartition(PartitionKeyRange partition if (failureMetricsForPartition.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable) { unavailableLocations.add(location); + doesPartitionHaveUnavailableLocations = true; + } else if (failureMetricsForPartition.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable) { + doesPartitionHaveUnavailableLocations = true; } } } + if (!doesPartitionHaveUnavailableLocations) { + this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); + } + return UnmodifiableList.unmodifiableList(unavailableLocations); } private Flux updateStaleLocationInfo() { - return Mono.just(1) + return Flux.fromIterable(this.partitionsWithPossibleUnavailableRegions.values()) .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) - .repeat() - .delayElements(Duration.ofSeconds(60)) - .flatMap(ignore -> { + .flatMap(partitionKeyRange -> { logger.info("Background updateStaleLocationInfo kicking in..."); - for (Map.Entry pkRangeToFailoverInfo : this.partitionKeyRangeToFailoverInfo.entrySet()) { - - PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = pkRangeToFailoverInfo.getValue(); + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + if (partitionLevelLocationUnavailabilityInfo != null) { for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToFailureMetricsForPartition.entrySet()) { - FailureMetricsForPartition failureMetricsForPartition = locationToLocationLevelMetrics.getValue(); - failureMetricsForPartition.handleSuccess(false); + failureMetricsForPartition.handleSuccess(false, partitionKeyRange); } + } else { + this.partitionsWithPossibleUnavailableRegions.remove(partitionKeyRange); } return Mono.empty(); - }); + }) + .repeat() + .delayElements(Duration.ofSeconds(60)); } private static class PartitionLevelLocationUnavailabilityInfo { @@ -191,7 +201,7 @@ private static class PartitionLevelLocationUnavailabilityInfo { this.locationEndpointToFailureMetricsForPartition = new ConcurrentHashMap<>(); } - public boolean isFailureThresholdBreachedForLocation(URI locationWithFailure) { + public boolean isFailureThresholdBreachedForLocation(PartitionKeyRange partitionKeyRange, URI locationWithFailure) { AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); @@ -201,7 +211,7 @@ public boolean isFailureThresholdBreachedForLocation(URI locationWithFailure) { failureMetricsForPartitionAsVal = new FailureMetricsForPartition(); } - failureMetricsForPartitionAsVal.handleFailure(1); + failureMetricsForPartitionAsVal.handleFailure(partitionKeyRange); isFailureThresholdBreached.set(failureMetricsForPartitionAsVal.isFailureThresholdBreached()); return failureMetricsForPartitionAsVal; @@ -210,18 +220,18 @@ public boolean isFailureThresholdBreachedForLocation(URI locationWithFailure) { return isFailureThresholdBreached.get(); } - public void bookmarkSuccess(URI succeededLocation) { + public void bookmarkSuccess(PartitionKeyRange partitionKeyRange, URI succeededLocation) { this.locationEndpointToFailureMetricsForPartition.compute(succeededLocation, (locationAsKey, failureMetricsForPartitionAsVal) -> { if (failureMetricsForPartitionAsVal != null) { - failureMetricsForPartitionAsVal.handleSuccess(false);; + failureMetricsForPartitionAsVal.handleSuccess(false, partitionKeyRange);; } return failureMetricsForPartitionAsVal; }); } - public boolean areLocationsAvailableForPartitionKeyRange(List availableLocationsAtAccountLevel) { + public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRange partitionKeyRange, List availableLocationsAtAccountLevel) { for (URI availableLocation : availableLocationsAtAccountLevel) { if (!this.locationEndpointToFailureMetricsForPartition.containsKey(availableLocation)) { @@ -255,7 +265,7 @@ public boolean areLocationsAvailableForPartitionKeyRange(List availableLoca } if (locationLevelFailureMetadataForMostStaleLocation != null) { - locationLevelFailureMetadataForMostStaleLocation.handleSuccess(true); + locationLevelFailureMetadataForMostStaleLocation.handleSuccess(true, partitionKeyRange); return true; } @@ -270,7 +280,7 @@ private static class FailureMetricsForPartition { private final AtomicReference partitionScopedRegionUnavailabilityStatus = new AtomicReference<>(PartitionScopedRegionUnavailabilityStatus.Available); private final AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - public void handleSuccess(boolean forceStateChange) { + public void handleSuccess(boolean forceStateChange, PartitionKeyRange partitionKeyRange) { PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); @@ -289,7 +299,7 @@ public void handleSuccess(boolean forceStateChange) { successCount.incrementAndGet(); if (successCount.get() > 10 && (double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); - logger.info("Partition marked as Available"); + logger.info("Partition {}-{} marked as Available from StaleUnavailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); } } break; @@ -297,11 +307,11 @@ public void handleSuccess(boolean forceStateChange) { if (!forceStateChange) { if (Duration.between(this.unavailableSince.get(), Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition marked as StaleUnavailable"); + logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); } } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition marked as StaleUnavailable"); + logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); } break; default: @@ -309,7 +319,7 @@ public void handleSuccess(boolean forceStateChange) { } } - public void handleFailure(int errorCount) { + public void handleFailure(PartitionKeyRange partitionKeyRange) { PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); @@ -318,18 +328,18 @@ public void handleFailure(int errorCount) { switch (currentStatusSnapshot) { case Available: if (failureCount.get() < allowedFailureCount) { - failureCount.addAndGet(errorCount); + failureCount.incrementAndGet(); } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); - logger.info("Partition marked as FreshUnavailable from Available"); + logger.info("Partition {}-{} marked as FreshUnavailable from Available", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); } break; case StaleUnavailable: if (failureCount.get() < allowedFailureCount) { - failureCount.addAndGet(errorCount); + failureCount.incrementAndGet(); } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); - logger.info("Partition marked as FreshUnavailable from StaleUnavailable"); + logger.info("Partition {}-{} marked as FreshUnavailable from StaleUnavailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); } break; default: From 205d837aece04033d2067c5c7ccdf406f241938d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 13 May 2024 13:47:45 -0400 Subject: [PATCH 028/140] Implementing 408 handling for partition-level circuit breaker. --- ...itionEndpointManagerForCircuitBreaker.java | 46 ++++++++++++------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 1bafe2b4ae2a..a4e6b9eaf89e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -158,6 +158,8 @@ public List getUnavailableLocationsForPartition(PartitionKeyRange partition doesPartitionHaveUnavailableLocations = true; } else if (failureMetricsForPartition.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable) { doesPartitionHaveUnavailableLocations = true; + } else if (failureMetricsForPartition.failureCount.get() >= 1) { + doesPartitionHaveUnavailableLocations = true; } } } @@ -170,30 +172,33 @@ public List getUnavailableLocationsForPartition(PartitionKeyRange partition } private Flux updateStaleLocationInfo() { - return Flux.fromIterable(this.partitionsWithPossibleUnavailableRegions.values()) + return Mono.just(1) + .delayElement(Duration.ofSeconds(60)) + .repeat() + .flatMap(ignore -> Flux.fromIterable(this.partitionsWithPossibleUnavailableRegions.entrySet())) .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) - .flatMap(partitionKeyRange -> { + .flatMap(partitionKeyRangeToPartitionKeyRangePair -> { logger.info("Background updateStaleLocationInfo kicking in..."); + PartitionKeyRange partitionKeyRange = partitionKeyRangeToPartitionKeyRangePair.getKey(); + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); if (partitionLevelLocationUnavailabilityInfo != null) { for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToFailureMetricsForPartition.entrySet()) { FailureMetricsForPartition failureMetricsForPartition = locationToLocationLevelMetrics.getValue(); - failureMetricsForPartition.handleSuccess(false, partitionKeyRange); + failureMetricsForPartition.handleSuccess(false, locationToLocationLevelMetrics.getKey(), partitionKeyRange); } } else { this.partitionsWithPossibleUnavailableRegions.remove(partitionKeyRange); } return Mono.empty(); - }) - .repeat() - .delayElements(Duration.ofSeconds(60)); + }); } - private static class PartitionLevelLocationUnavailabilityInfo { + private class PartitionLevelLocationUnavailabilityInfo { private final ConcurrentHashMap locationEndpointToFailureMetricsForPartition; @@ -224,7 +229,7 @@ public void bookmarkSuccess(PartitionKeyRange partitionKeyRange, URI succeededLo this.locationEndpointToFailureMetricsForPartition.compute(succeededLocation, (locationAsKey, failureMetricsForPartitionAsVal) -> { if (failureMetricsForPartitionAsVal != null) { - failureMetricsForPartitionAsVal.handleSuccess(false, partitionKeyRange);; + failureMetricsForPartitionAsVal.handleSuccess(false, succeededLocation, partitionKeyRange);; } return failureMetricsForPartitionAsVal; @@ -247,6 +252,7 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRange parti Instant mostStaleUnavailableTimeAcrossRegions = Instant.MAX; FailureMetricsForPartition locationLevelFailureMetadataForMostStaleLocation = null; + URI mostStaleUnavailableLocation = null; // find region with most 'stale' unavailability for (Map.Entry uriToLocationLevelFailureMetadata : this.locationEndpointToFailureMetricsForPartition.entrySet()) { @@ -260,12 +266,13 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRange parti if (mostStaleUnavailableTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { mostStaleUnavailableTimeAcrossRegions = unavailableSinceSnapshot; + mostStaleUnavailableLocation = uriToLocationLevelFailureMetadata.getKey(); locationLevelFailureMetadataForMostStaleLocation = failureMetricsForPartition; } } if (locationLevelFailureMetadataForMostStaleLocation != null) { - locationLevelFailureMetadataForMostStaleLocation.handleSuccess(true, partitionKeyRange); + locationLevelFailureMetadataForMostStaleLocation.handleSuccess(true, mostStaleUnavailableLocation, partitionKeyRange); return true; } @@ -273,14 +280,16 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRange parti } } - private static class FailureMetricsForPartition { + private class FailureMetricsForPartition { private final AtomicInteger failureCount = new AtomicInteger(0); private final AtomicInteger successCount = new AtomicInteger(0); private final AtomicReference unavailableSince = new AtomicReference<>(Instant.MAX); private final AtomicReference partitionScopedRegionUnavailabilityStatus = new AtomicReference<>(PartitionScopedRegionUnavailabilityStatus.Available); private final AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - public void handleSuccess(boolean forceStateChange, PartitionKeyRange partitionKeyRange) { + public void handleSuccess(boolean forceStateChange, URI location, PartitionKeyRange partitionKeyRange) { + + logger.info("Handling success"); PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); @@ -299,7 +308,7 @@ public void handleSuccess(boolean forceStateChange, PartitionKeyRange partitionK successCount.incrementAndGet(); if (successCount.get() > 10 && (double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); - logger.info("Partition {}-{} marked as Available from StaleUnavailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); + logger.info("Partition {}-{} marked as Available from StaleUnavailable for location : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } } break; @@ -307,11 +316,11 @@ public void handleSuccess(boolean forceStateChange, PartitionKeyRange partitionK if (!forceStateChange) { if (Duration.between(this.unavailableSince.get(), Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); + logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for location : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); + logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for location : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } break; default: @@ -321,6 +330,8 @@ public void handleSuccess(boolean forceStateChange, PartitionKeyRange partitionK public void handleFailure(PartitionKeyRange partitionKeyRange) { + logger.error("Handling failure"); + PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); int allowedFailureCount = getAllowedFailureCountByStatus(currentStatusSnapshot); @@ -328,15 +339,18 @@ public void handleFailure(PartitionKeyRange partitionKeyRange) { switch (currentStatusSnapshot) { case Available: if (failureCount.get() < allowedFailureCount) { - failureCount.incrementAndGet(); + failureCount.addAndGet(1); + logger.error("Failure count : {}", failureCount.get()); + logger.error("Allowed failure count : {}", allowedFailureCount); } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); + GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRange, partitionKeyRange); logger.info("Partition {}-{} marked as FreshUnavailable from Available", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); } break; case StaleUnavailable: if (failureCount.get() < allowedFailureCount) { - failureCount.incrementAndGet(); + failureCount.addAndGet(1); } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); logger.info("Partition {}-{} marked as FreshUnavailable from StaleUnavailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); From 2a443361049f2d9b47f4742d656f3f982dc0ef23 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 14 May 2024 18:06:41 -0400 Subject: [PATCH 029/140] Code refactor. --- .../com/azure/cosmos/CosmosDiagnostics.java | 29 +++++++ .../ClientSideRequestStatistics.java | 76 ++++++++++++++++++- ...itionEndpointManagerForCircuitBreaker.java | 14 ++-- .../ImplementationBridgeHelpers.java | 5 ++ .../implementation/RxDocumentClientImpl.java | 67 +++++++--------- .../cosmos/implementation/query/Fetcher.java | 39 +++++----- 6 files changed, 167 insertions(+), 63 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java index 4c255a79fcfb..6b67640c7d19 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java @@ -7,6 +7,7 @@ import com.azure.cosmos.implementation.FeedResponseDiagnostics; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.util.Beta; import com.fasterxml.jackson.core.JsonProcessingException; @@ -322,6 +323,14 @@ CosmosDiagnostics setSamplingRateSnapshot(double samplingRate) { return this; } + String getFirstContactedRegion() { + return this.clientSideRequestStatistics.getFirstContactedRegion(); + } + + URI getFirstContactedLocationEndpoint() { + return this.clientSideRequestStatistics.getFirstContactedLocationEndpoint(); + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// /////////////////////////////////////////////////////////////////////////////////////////// @@ -464,6 +473,26 @@ public void setDiagnosticsContext(CosmosDiagnostics cosmosDiagnostics, CosmosDia cosmosDiagnostics.setDiagnosticsContext(ctx); } + + @Override + public String getFirstContactedRegion(CosmosDiagnostics cosmosDiagnostics) { + + if (cosmosDiagnostics == null) { + return StringUtils.EMPTY; + } + + return cosmosDiagnostics.getFirstContactedRegion(); + } + + @Override + public URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics) { + + if (cosmosDiagnostics == null) { + return null; + } + + return cosmosDiagnostics.getFirstContactedLocationEndpoint(); + } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index 5113bd321a2d..986489e505ad 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -26,8 +26,10 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.NavigableSet; import java.util.Objects; import java.util.Set; +import java.util.TreeSet; import java.util.UUID; import java.util.concurrent.ConcurrentLinkedDeque; import java.util.stream.Collectors; @@ -46,6 +48,7 @@ public class ClientSideRequestStatistics { private Instant requestStartTimeUTC; private Instant requestEndTimeUTC; private Set regionsContacted; + private NavigableSet regionsContactedWithContext; private Set locationEndpointsContacted; private RetryContext retryContext; private FaultInjectionRequestContext requestContext; @@ -68,6 +71,7 @@ public ClientSideRequestStatistics(DiagnosticsClientContext diagnosticsClientCon this.contactedReplicas = Collections.synchronizedList(new ArrayList<>()); this.failedReplicas = Collections.synchronizedSet(new HashSet<>()); this.regionsContacted = Collections.synchronizedSet(new HashSet<>()); + this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>()); this.locationEndpointsContacted = Collections.synchronizedSet(new HashSet<>()); this.metadataDiagnosticsContext = new MetadataDiagnosticsContext(); this.serializationDiagnosticsContext = new SerializationDiagnosticsContext(); @@ -88,6 +92,7 @@ public ClientSideRequestStatistics(ClientSideRequestStatistics toBeCloned) { this.contactedReplicas = Collections.synchronizedList(new ArrayList<>(toBeCloned.contactedReplicas)); this.failedReplicas = Collections.synchronizedSet(new HashSet<>(toBeCloned.failedReplicas)); this.regionsContacted = Collections.synchronizedSet(new HashSet<>(toBeCloned.regionsContacted)); + this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>(toBeCloned.regionsContactedWithContext)); this.locationEndpointsContacted = Collections.synchronizedSet( new HashSet<>(toBeCloned.locationEndpointsContacted)); this.metadataDiagnosticsContext = new MetadataDiagnosticsContext(toBeCloned.metadataDiagnosticsContext); @@ -176,6 +181,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost globalEndpointManager.getRegionName(locationEndPoint, request.getOperationType()); this.regionsContacted.add(storeResponseStatistics.regionName); this.locationEndpointsContacted.add(locationEndPoint); + this.regionsContactedWithContext.add(new RegionWithContext(storeResponseStatistics.regionName, locationEndPoint)); } if (storeResponseStatistics.requestOperationType == OperationType.Head @@ -206,8 +212,13 @@ public void recordGatewayResponse( this.recordRetryContextEndTime(); if (locationEndPoint != null) { - this.regionsContacted.add(globalEndpointManager.getRegionName(locationEndPoint, rxDocumentServiceRequest.getOperationType())); + + String regionName = globalEndpointManager.getRegionName(locationEndPoint, rxDocumentServiceRequest.getOperationType()); + + this.regionsContacted.add(regionName); this.locationEndpointsContacted.add(locationEndPoint); + + this.regionsContactedWithContext.add(new RegionWithContext(regionName, locationEndPoint)); } GatewayStatistics gatewayStatistics = new GatewayStatistics(); @@ -397,6 +408,21 @@ private void mergeLocationEndpointsContacted(Set other) { } } + private void mergeRegionWithContextSet(NavigableSet other) { + if (other == null) { + return; + } + + if (this.regionsContactedWithContext == null || this.regionsContactedWithContext.isEmpty()) { + this.regionsContactedWithContext = other; + return; + } + + for (RegionWithContext regionWithContext : other) { + this.regionsContactedWithContext.add(regionWithContext); + } + } + private void mergeRegionsContacted(Set other) { if (other == null) { return; @@ -458,6 +484,7 @@ public void mergeClientSideRequestStatistics(ClientSideRequestStatistics other) this.mergeFailedReplica(other.failedReplicas); this.mergeLocationEndpointsContacted(other.locationEndpointsContacted); this.mergeRegionsContacted(other.regionsContacted); + this.mergeRegionWithContextSet(other.regionsContactedWithContext); this.mergeStartTime(other.requestStartTimeUTC); this.mergeEndTime(other.requestEndTimeUTC); this.mergeSupplementalResponses(other.supplementalResponseStatisticsList); @@ -584,6 +611,22 @@ public ClientSideRequestStatistics setSamplingRateSnapshot(double samplingRateSn return this; } + public String getFirstContactedRegion() { + if (this.regionsContactedWithContext == null || this.regionsContactedWithContext.isEmpty()) { + return StringUtils.EMPTY; + } + + return this.regionsContactedWithContext.first().regionContacted; + } + + public URI getFirstContactedLocationEndpoint() { + if (this.regionsContactedWithContext == null || this.regionsContactedWithContext.isEmpty()) { + return null; + } + + return this.regionsContactedWithContext.first().locationEndpointsContacted; + } + public static class StoreResponseStatistics { @JsonSerialize(using = StoreResultDiagnostics.StoreResultDiagnosticsSerializer.class) private StoreResultDiagnostics storeResult; @@ -910,4 +953,35 @@ public static CosmosDiagnosticsSystemUsageSnapshot fetchSystemInformation() { (maxMemory - (totalMemory - freeMemory)) + " KB", runtime.availableProcessors()); } + + static class RegionWithContext implements Comparable { + + private final String regionContacted; + private final URI locationEndpointsContacted; + private final long recordedTimestamp; + + RegionWithContext(String regionContacted, URI locationEndpointsContacted) { + this.regionContacted = regionContacted; + this.locationEndpointsContacted = locationEndpointsContacted; + this.recordedTimestamp = System.currentTimeMillis(); + } + + public String getRegionContacted() { + return regionContacted; + } + + @Override + public int compareTo(RegionWithContext o) { + + if (o == null || this.recordedTimestamp > o.recordedTimestamp) { + return 1; + } + + if (this.recordedTimestamp == o.recordedTimestamp) { + return 0; + } + + return -1; + } + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index a4e6b9eaf89e..29fa078ce158 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -216,7 +216,7 @@ public boolean isFailureThresholdBreachedForLocation(PartitionKeyRange partition failureMetricsForPartitionAsVal = new FailureMetricsForPartition(); } - failureMetricsForPartitionAsVal.handleFailure(partitionKeyRange); + failureMetricsForPartitionAsVal.handleFailure(partitionKeyRange, locationAsKey); isFailureThresholdBreached.set(failureMetricsForPartitionAsVal.isFailureThresholdBreached()); return failureMetricsForPartitionAsVal; @@ -308,7 +308,7 @@ public void handleSuccess(boolean forceStateChange, URI location, PartitionKeyRa successCount.incrementAndGet(); if (successCount.get() > 10 && (double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); - logger.info("Partition {}-{} marked as Available from StaleUnavailable for location : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + logger.info("Partition {}-{} marked as Available from StaleUnavailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } } break; @@ -316,11 +316,11 @@ public void handleSuccess(boolean forceStateChange, URI location, PartitionKeyRa if (!forceStateChange) { if (Duration.between(this.unavailableSince.get(), Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for location : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for location : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } break; default: @@ -328,7 +328,7 @@ public void handleSuccess(boolean forceStateChange, URI location, PartitionKeyRa } } - public void handleFailure(PartitionKeyRange partitionKeyRange) { + public void handleFailure(PartitionKeyRange partitionKeyRange, URI location) { logger.error("Handling failure"); @@ -345,7 +345,7 @@ public void handleFailure(PartitionKeyRange partitionKeyRange) { } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRange, partitionKeyRange); - logger.info("Partition {}-{} marked as FreshUnavailable from Available", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); + logger.info("Partition {}-{} marked as FreshUnavailable from Available for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } break; case StaleUnavailable: @@ -353,7 +353,7 @@ public void handleFailure(PartitionKeyRange partitionKeyRange) { failureCount.addAndGet(1); } else { this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); - logger.info("Partition {}-{} marked as FreshUnavailable from StaleUnavailable", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive()); + logger.info("Partition {}-{} marked as FreshUnavailable from StaleUnavailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); } break; default: diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 30df715c8267..947b9808a31f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -76,6 +76,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import java.net.URI; import java.time.Duration; import java.util.Collection; import java.util.EnumSet; @@ -771,6 +772,10 @@ void recordAddressResolutionEnd( boolean isNotEmpty(CosmosDiagnostics cosmosDiagnostics); void setDiagnosticsContext(CosmosDiagnostics cosmosDiagnostics, CosmosDiagnosticsContext ctx); + + String getFirstContactedRegion(CosmosDiagnostics cosmosDiagnostics); + + URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 51a7f5b10c60..de2bcdc8afac 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -17,7 +17,6 @@ import com.azure.cosmos.DirectConnectionConfig; import com.azure.cosmos.SessionRetryOptions; import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; -import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.ImmutablePair; import com.azure.cosmos.implementation.batch.BatchResponseParser; @@ -86,6 +85,7 @@ import reactor.core.Exceptions; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import reactor.core.publisher.SignalType; import reactor.util.concurrent.Queues; import java.io.IOException; @@ -106,7 +106,6 @@ import java.util.Locale; import java.util.Map; import java.util.NoSuchElementException; -import java.util.Optional; import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; @@ -2238,52 +2237,35 @@ private Mono handleRegionFeedbackForPointOperation( .doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { - RxDocumentServiceRequest failedRequest = requestReference.get(); - PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + RxDocumentServiceRequest failedRequest = requestReference.get(); + PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); - if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { + if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { - OperationCancelledException exception = Utils.as(throwable, OperationCancelledException.class); - Optional firstContactedRegion = exception.getDiagnostics().getContactedRegionNames().stream().findFirst(); - - UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); - - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); + if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); + } + } else { + this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); } - } else { - OperationCancelledException exception = Utils.as(throwable, OperationCancelledException.class); - Optional firstContactedRegion = exception.getDiagnostics().getContactedRegionNames().stream().findFirst(); - - UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); - - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); } } }) - .doOnCancel(() -> { - RxDocumentServiceRequest failedRequest = requestReference.get(); - PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); + .doFinally(signalType -> { - if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { - - if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { - Optional firstContactedRegion = failedRequest.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); + if (signalType == SignalType.CANCEL && Configs.isPartitionLevelCircuitBreakerEnabled()) { + RxDocumentServiceRequest failedRequest = requestReference.get(); + PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); - UnmodifiableList endpoints = requestReference.get().isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, requestReference.get().getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(requestReference.get(), filteredEndpoint.get(0)); + if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); + } + } else { + this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); } - } else { - Optional firstContactedRegion = failedRequest.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); - - UnmodifiableList endpoints = failedRequest.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, failedRequest.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); - - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(failedRequest, filteredEndpoint.get(0)); } }); } @@ -6189,6 +6171,15 @@ private Mono executeFeedOperationWithAvailabilityStrategy( }); } + private void tryMarkPartitionKeyRangeAsUnavailableForRegion(RxDocumentServiceRequest failedRequest) { + + URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); + + if (firstContactedLocationEndpoint != null) { + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); + } + } + @FunctionalInterface private interface DocumentPointOperation { Mono> apply( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 7ee80c807a82..4d68ff850502 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.FeedOperationContext; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; @@ -36,6 +37,10 @@ abstract class Fetcher { private final static Logger logger = LoggerFactory.getLogger(Fetcher.class); + private final static + ImplementationBridgeHelpers.CosmosDiagnosticsHelper.CosmosDiagnosticsAccessor diagnosticsAccessor = + ImplementationBridgeHelpers.CosmosDiagnosticsHelper.getCosmosDiagnosticsAccessor(); + private final Function>> executeFunc; private final boolean isChangeFeed; private final OperationContextAndListenerTuple operationContext; @@ -194,33 +199,33 @@ private Mono> nextPage(RxDocumentServiceRequest request) { return; } - FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); - - if (feedOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange)) { + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { - Optional firstContactedRegion = request.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); + FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); - UnmodifiableList endpoints = request.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); + if (feedOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { + if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange)) { - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(request, filteredEndpoint.get(0)); + if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { + this.tryMarkPartitionKeyRangeAsUnavailable(request); + } } + } else { + this.tryMarkPartitionKeyRangeAsUnavailable(request); } - } else { - Optional firstContactedRegion = request.requestContext.cosmosDiagnostics.getContactedRegionNames().stream().findFirst(); - - UnmodifiableList endpoints = request.isReadOnly() ? this.globalEndpointManager.getReadEndpoints() : this.globalEndpointManager.getWriteEndpoints(); - List filteredEndpoint = endpoints.stream().filter(uri -> this.globalEndpointManager.getRegionName(uri, request.getOperationType()).equals(firstContactedRegion.get())).collect(Collectors.toList()); - - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(request, filteredEndpoint.get(0)); } - if (request.requestContext != null && request.requestContext.cosmosDiagnostics != null) { this.cancelledRequestDiagnosticsTracker.add(request.requestContext.cosmosDiagnostics); } }); } + + private void tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest failedRequest) { + URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); + + if (firstContactedLocationEndpoint != null) { + this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); + } + } } From 247c9a78a1790ee98bb641a8fddeed6af932aa3f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 17 May 2024 21:27:10 -0400 Subject: [PATCH 030/140] Test changes and multi-container fixes. --- .../PartitionLevelCircuitBreakerTests.java | 1044 +++++++++-------- .../implementation/ClientRetryPolicy.java | 2 +- ...itionEndpointManagerForCircuitBreaker.java | 518 ++++---- .../implementation/RxDocumentClientImpl.java | 7 +- .../implementation/RxGatewayStoreModel.java | 6 +- .../directconnectivity/StoreClient.java | 6 +- .../cosmos/implementation/query/Fetcher.java | 8 +- 7 files changed, 863 insertions(+), 728 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index ae402f2b62c0..71c3d12ebaa0 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -11,10 +11,13 @@ import com.azure.cosmos.CosmosClientBuilder; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; +import com.azure.cosmos.CosmosException; import com.azure.cosmos.TestObject; import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; import com.azure.cosmos.faultinjection.FaultInjectionTestBase; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; +import com.azure.cosmos.implementation.guava25.base.Function; +import com.azure.cosmos.implementation.throughputControl.TestItem; import com.azure.cosmos.models.CosmosContainerProperties; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; @@ -38,6 +41,7 @@ import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; import org.testng.SkipException; import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; import org.testng.annotations.Factory; import org.testng.annotations.Test; import reactor.core.publisher.Flux; @@ -78,8 +82,36 @@ public void beforeClass() { } } - @Test(groups = {"multi-master"}) - public void readHits503InFirstPreferredRegion() { + @DataProvider(name = "partitionLevelCircuitBreakerTestConfigs") + public Object[][] partitionLevelCircuitBreakerTestConfigs() { + return new Object[][] { +// {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, +// {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, +// {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, +// {FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, +// {FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, +// {FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, +// {FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false} + }; + } + + @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs") + public void operationHitsTerminalExceptionInFirstPreferredRegion( + FaultInjectionOperationType faultInjectionOperationType, + FaultInjectionServerErrorType faultInjectionServerErrorType, + int faultInjectionHitCount, + Duration faultInjectionDuration, + boolean shouldEndToEndTimeoutBeInjected, + boolean shouldThresholdBasedAvailabilityStrategyBeEnabled) { + + logger.info("Checking circuit breaking behavior for {}", faultInjectionOperationType); List preferredRegions = this.writeRegions; CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); @@ -87,7 +119,7 @@ public void readHits503InFirstPreferredRegion() { ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("readHits503InPrimaryRegion test is not applicable to GATEWAY connectivity mode!"); + throw new SkipException("Test is not applicable to GATEWAY connectivity mode!"); } CosmosAsyncClient client = clientBuilder.buildAsyncClient(); @@ -97,7 +129,7 @@ public void readHits503InFirstPreferredRegion() { CosmosAsyncContainer container = null; CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(getProvisionedThroughputForContainer(faultInjectionOperationType)); try { @@ -108,260 +140,142 @@ public void readHits503InFirstPreferredRegion() { Thread.sleep(10_000); - TestObject testObject = TestObject.create(); - - String itemIdMappingToUnhealthyPartition = testObject.getId(); - - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.READ_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) - .region(preferredRegions.get(0)) - .build(); - - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); - - FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(13) - .build(); - - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) - .block(); - - container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); - - for (int i = 1; i <= 15; i++) { - CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); - - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); + int testObjCountToBootstrapFrom = getTestObjectCountToBootstrapFrom(faultInjectionOperationType, 15); + List testObjects = new ArrayList<>(); - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { + TestObject testObject = TestObject.create(); + testObjects.add(testObject); + container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); } - logger.info("Sleep for 60 seconds"); - - Thread.sleep(60_000); - - for (int i = 1; i <= 30; i++) { - CosmosItemResponse response = container.readItem(itemIdMappingToUnhealthyPartition, new PartitionKey(itemIdMappingToUnhealthyPartition), TestObject.class).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + FeedRange faultyFeedRange; - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + if (testObjects.size() != 1) { + faultyFeedRange = FeedRange.forFullRange(); + } else { + faultyFeedRange = FeedRange.forLogicalPartition(new PartitionKey(testObjects.get(0).getId())); } - - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Read operations should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); - } - } - - @Test(groups = {"multi-master"}) - public void upsertHits503InFirstPreferredRegion() { - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("upsertHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); - } - - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - - String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; - - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); - - try { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); - - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); - - Thread.sleep(10_000); - - TestObject testObject = TestObject.create(); - - String itemIdMappingToUnhealthyPartition = testObject.getId(); - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.UPSERT_ITEM) + .operationType(faultInjectionOperationType) .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) + .endpoints(new FaultInjectionEndpointBuilder(faultyFeedRange).build()) .region(preferredRegions.get(0)) .build(); FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); - - FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(13) + .getResultBuilder(faultInjectionServerErrorType) .build(); - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) - .block(); - - container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); - - for (int i = 1; i <= 15; i++) { - CosmosItemResponse response = container.upsertItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); - - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - - CosmosItemResponse response = container.upsertItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); - logger.info("Sleep for 60 seconds"); - - Thread.sleep(60_000); - - for (int i = 1; i <= 30; i++) { - response = container.upsertItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); - - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + FaultInjectionRule faultInjectionRule = null; + + if (faultInjectionServerErrorType == FaultInjectionServerErrorType.GONE) { + faultInjectionRule = new FaultInjectionRuleBuilder("gone-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(faultInjectionDuration) + .build(); + } else if (faultInjectionServerErrorType == FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) { + faultInjectionRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(faultInjectionHitCount) + .build(); } + if (faultInjectionRule != null) { - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Upsert operations should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); - } - } - - @Test(groups = {"multi-master"}) - public void createHits503InFirstPreferredRegion() { - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("createHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); - } - - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - - String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; - - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); - - try { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); - - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); - - Thread.sleep(10_000); - - TestObject testObject = TestObject.create(); - - String itemIdMappingToUnhealthyPartition = testObject.getId(); - - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.CREATE_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) - .region(preferredRegions.get(0)) - .build(); - - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(faultInjectionRule)) + .block(); - FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(13) - .build(); + Function> faultInjectedFunc = + generateOperation(faultInjectionOperationType); - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) - .block(); + assertThat(faultInjectedFunc).isNotNull().as("faultInjectedFunc cannot be null!"); - container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + operationInvocationParamsWrapper.cosmosAsyncContainer = container; - for (int i = 1; i <= 15; i++) { - testObject = TestObject.create(); - CosmosItemResponse response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + if (shouldEndToEndTimeoutBeInjected) { - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = (shouldThresholdBasedAvailabilityStrategyBeEnabled) ? + new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()).build() : + new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build(); - testObject = TestObject.create(); - CosmosItemResponse response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - logger.info("Sleep for 60 seconds"); + operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); - Thread.sleep(60_000); + operationInvocationParamsWrapper.queryRequestOptions = new CosmosQueryRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); - for (int i = 1; i <= 30; i++) { - testObject = TestObject.create(); - response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + operationInvocationParamsWrapper.patchItemRequestOptions = new CosmosPatchItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); + for (int i = 1; i <= 15; i++) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); + logger.info("Hit count : {}", faultInjectionRule.getHitCount()); + + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + + response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + + response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + + response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + logger.info("Sleep for 120 seconds"); + Thread.sleep(120_000); + + for (int i = 16; i <= 30; i++) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); + + logger.info("Hit count : {}", faultInjectionRule.getHitCount()); + + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + + response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + + response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + + response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } } logger.info("End test"); @@ -369,7 +283,7 @@ public void createHits503InFirstPreferredRegion() { fail("InterruptedException should not have been thrown!"); } catch (Exception ex) { logger.error("Exception thrown :", ex); - fail("Create operations should have passed!"); + fail("Test should have passed!"); } finally { System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); safeDeleteCollection(container); @@ -378,140 +292,58 @@ public void createHits503InFirstPreferredRegion() { } @Test(groups = {"multi-master"}) - public void deleteHits503InFirstPreferredRegion() { + public void operationHitsTerminalExceptionInMultipleContainers() { + logger.info("Checking circuit breaking behavior for {}", FaultInjectionOperationType.READ_ITEM); + List preferredRegions = this.writeRegions; CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("deleteHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + throw new SkipException("Test is not applicable to GATEWAY connectivity mode!"); } CosmosAsyncClient client = clientBuilder.buildAsyncClient(); CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; + String multiPartitionContainerId1 = UUID.randomUUID() + "-multi-partition-test-container"; + String multiPartitionContainerId2 = UUID.randomUUID() + "-multi-partition-test-container"; - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); + CosmosAsyncContainer container1 = null; + CosmosContainerProperties containerProperties1 = new CosmosContainerProperties(multiPartitionContainerId1, "/id"); + ThroughputProperties throughputProperties1 = ThroughputProperties.createManualThroughput(12_000); + + CosmosAsyncContainer container2 = null; + CosmosContainerProperties containerProperties2 = new CosmosContainerProperties(multiPartitionContainerId2, "/id"); + ThroughputProperties throughputProperties2 = ThroughputProperties.createManualThroughput(12_000); try { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); + database.createContainerIfNotExists(containerProperties1, throughputProperties1).block(); + container1 = database.getContainer(multiPartitionContainerId1); - Thread.sleep(10_000); + database.createContainerIfNotExists(containerProperties2, throughputProperties2).block(); + container2 = database.getContainer(multiPartitionContainerId2); - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.DELETE_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) - .region(preferredRegions.get(0)) - .build(); - - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); - - FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(13) - .build(); - - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) - .block(); + Thread.sleep(10_000); - List idAndPks = new ArrayList<>(); + int testObjCountToBootstrapFrom = 2; + List testObjects1 = new ArrayList<>(); - for (int i = 1; i <= 30; i++) { + for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { TestObject testObject = TestObject.create(); - CosmosItemResponse response = container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - idAndPks.add(testObject.getId()); - } - - for (int i = 0; i < 15; i++) { - CosmosItemResponse response = container.deleteItem(idAndPks.get(i), new PartitionKey(idAndPks.get(i)), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); - - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + testObjects1.add(testObject); + container1.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + container2.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); } - logger.info("Sleeping for a minute!"); - Thread.sleep(60_000); - - for (int i = 15; i < 30; i++) { - CosmosItemResponse response = container.deleteItem(idAndPks.get(i), new PartitionKey(idAndPks.get(i)), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); - - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Create operations should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); - } - } - - @Test(groups = {"multi-master"}) - public void patchHits503InFirstPreferredRegion() { - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("createHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); - } - - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - - String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; - - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); - - try { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); - - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); - - Thread.sleep(10_000); - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.PATCH_ITEM) + .operationType(FaultInjectionOperationType.READ_ITEM) .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(testObjects1.get(0).getId()))).build()) .region(preferredRegions.get(0)) .build(); @@ -519,46 +351,153 @@ public void patchHits503InFirstPreferredRegion() { .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) .build(); - FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + FaultInjectionRule faultInjectionRule1 = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) .condition(faultInjectionCondition) .result(faultInjectionServerErrorResult) - .hitLimit(13) + .hitLimit(11) .build(); - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) - .block(); - - TestObject testObject = TestObject.create(); - container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - - CosmosPatchOperations patchOperations = CosmosPatchOperations.create().add("/number", 555); - - for (int i = 0; i < 15; i++) { - CosmosItemResponse response = container.patchItem(testObject.getId(), new PartitionKey(testObject.getId()), patchOperations, new CosmosPatchItemRequestOptions(), TestObject.class).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + FaultInjectionRule faultInjectionRule2 = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(11) + .build(); - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); + if (faultInjectionRule1 != null && faultInjectionRule2 != null) { - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - - logger.info("Sleep for 60 seconds!"); - Thread.sleep(60_000); + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container1, Arrays.asList(faultInjectionRule1)) + .block(); - for (int i = 0; i < 15; i++) { - CosmosItemResponse response = container.patchItem(testObject.getId(), new PartitionKey(testObject.getId()), patchOperations, new CosmosPatchItemRequestOptions(), TestObject.class).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container2, Arrays.asList(faultInjectionRule2)) + .block(); - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); + OperationInvocationParamsWrapper paramsWrapper1 = new OperationInvocationParamsWrapper(); + OperationInvocationParamsWrapper paramsWrapper2 = new OperationInvocationParamsWrapper(); + + Function> faultInjectedFunc = generateOperation(FaultInjectionOperationType.READ_ITEM); + + for (int i = 1; i <= 15; i++) { + paramsWrapper1.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); + paramsWrapper1.cosmosAsyncContainer = container1; + + paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); + paramsWrapper2.cosmosAsyncContainer = container2; + + OperationExecutionResult response1 = faultInjectedFunc.apply(paramsWrapper1); + OperationExecutionResult response2 = faultInjectedFunc.apply(paramsWrapper2); + + logger.info("Hit count : {}", faultInjectionRule1.getHitCount()); + logger.info("Hit count : {}", faultInjectionRule2.getHitCount()); + + if (response1.cosmosItemResponse != null) { + assertThat(response1.cosmosItemResponse).isNotNull(); + assertThat(response1.cosmosItemResponse.getDiagnostics()).isNotNull(); + + response1.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response1.feedResponse != null) { + assertThat(response1.feedResponse).isNotNull(); + assertThat(response1.feedResponse.getCosmosDiagnostics()).isNotNull(); + + response1.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response1.cosmosException != null) { + assertThat(response1.cosmosException).isNotNull(); + assertThat(response1.cosmosException.getDiagnostics()).isNotNull(); + + response1.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + if (response2.cosmosItemResponse != null) { + assertThat(response2.cosmosItemResponse).isNotNull(); + assertThat(response2.cosmosItemResponse.getDiagnostics()).isNotNull(); + + response2.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response2.feedResponse != null) { + assertThat(response2.feedResponse).isNotNull(); + assertThat(response2.feedResponse.getCosmosDiagnostics()).isNotNull(); + + response2.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response2.cosmosException != null) { + assertThat(response2.cosmosException).isNotNull(); + assertThat(response2.cosmosException.getDiagnostics()).isNotNull(); + + response2.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + logger.info("Sleep for 120 seconds"); + Thread.sleep(120_000); + + for (int i = 16; i <= 30; i++) { + paramsWrapper1.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); + paramsWrapper1.cosmosAsyncContainer = container1; + + paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); + paramsWrapper2.cosmosAsyncContainer = container2; + + OperationExecutionResult response1 = faultInjectedFunc.apply(paramsWrapper1); + OperationExecutionResult response2 = faultInjectedFunc.apply(paramsWrapper2); + + logger.info("Hit count : {}", faultInjectionRule1.getHitCount()); + logger.info("Hit count : {}", faultInjectionRule2.getHitCount()); + + if (response1.cosmosItemResponse != null) { + assertThat(response1.cosmosItemResponse).isNotNull(); + assertThat(response1.cosmosItemResponse.getDiagnostics()).isNotNull(); + + response1.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response1.feedResponse != null) { + assertThat(response1.feedResponse).isNotNull(); + assertThat(response1.feedResponse.getCosmosDiagnostics()).isNotNull(); + + response1.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response1.cosmosException != null) { + assertThat(response1.cosmosException).isNotNull(); + assertThat(response1.cosmosException.getDiagnostics()).isNotNull(); + + response1.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + if (response2.cosmosItemResponse != null) { + assertThat(response2.cosmosItemResponse).isNotNull(); + assertThat(response2.cosmosItemResponse.getDiagnostics()).isNotNull(); + + response2.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response2.feedResponse != null) { + assertThat(response2.feedResponse).isNotNull(); + assertThat(response2.feedResponse.getCosmosDiagnostics()).isNotNull(); + + response2.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response2.cosmosException != null) { + assertThat(response2.cosmosException).isNotNull(); + assertThat(response2.cosmosException.getDiagnostics()).isNotNull(); + + response2.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } } logger.info("End test"); @@ -566,205 +505,247 @@ public void patchHits503InFirstPreferredRegion() { fail("InterruptedException should not have been thrown!"); } catch (Exception ex) { logger.error("Exception thrown :", ex); - fail("Patch operations should have passed!"); + fail("Test should have passed!"); } finally { System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); + safeDeleteCollection(container1); + safeDeleteCollection(container2); safeClose(client); } } - @Test(groups = {"multi-master"}) - public void replaceHits503InFirstPreferredRegion() { - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("createHits503InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + private static int getTestObjectCountToBootstrapFrom(FaultInjectionOperationType faultInjectionOperationType, int opCount) { + switch (faultInjectionOperationType) { + case READ_ITEM: + case UPSERT_ITEM: + case REPLACE_ITEM: + case QUERY_ITEM: + case PATCH_ITEM: + return 1; + case DELETE_ITEM: + return 2 * opCount; + case CREATE_ITEM: + return 0; + default: + throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); } + } - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + private static Function> generateOperation(FaultInjectionOperationType faultInjectionOperationType) { - String multiPartitionContainerId = UUID.randomUUID() + "-single-partition-test-container"; + switch (faultInjectionOperationType) { + case READ_ITEM: + return (paramsWrapper) -> { - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(5000); + CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; - try { + try { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + CosmosItemResponse readItemResponse = asyncContainer.readItem( + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + itemRequestOptions, + TestObject.class) + .block(); - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); + return new OperationExecutionResult<>(readItemResponse); + } catch (Exception ex) { - Thread.sleep(10_000); + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.REPLACE_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forFullRange()).build()) - .region(preferredRegions.get(0)) - .build(); + throw ex; + } + }; + case UPSERT_ITEM: + return (paramsWrapper) -> { - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); + CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; - FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(13) - .build(); + try { - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) - .block(); + CosmosItemResponse upsertItemResponse = asyncContainer.upsertItem( + createdTestObject, + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); - TestObject testObject = TestObject.create(); - container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + return new OperationExecutionResult<>(upsertItemResponse); + } catch (Exception ex) { - for (int i = 0; i < 15; i++) { - CosmosItemResponse response = container.replaceItem(testObject, testObject.getId(), new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); + throw ex; + } + }; + case CREATE_ITEM: + return (paramsWrapper) -> { - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } + CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + TestObject createdTestObject = TestObject.create(); + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; - logger.info("Sleep for 60 seconds!"); - Thread.sleep(60_000); + try { - for (int i = 0; i < 15; i++) { - CosmosItemResponse response = container.replaceItem(testObject, testObject.getId(), new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + CosmosItemResponse createItemResponse = asyncContainer.createItem( + createdTestObject, + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); + return new OperationExecutionResult<>(createItemResponse); + } catch (Exception ex) { - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Replace operations should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); - } - } + throw ex; + } + }; + case DELETE_ITEM: + return (paramsWrapper) -> { - @Test(groups = {"multi-master"}) - public void queryHits503InFirstPreferredRegion() { - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + try { - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("readHits503InPrimaryRegion test is not applicable to GATEWAY connectivity mode!"); - } + CosmosItemResponse deleteItemResponse = asyncContainer.deleteItem( + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + return new OperationExecutionResult<>(deleteItemResponse); + } catch (Exception ex) { - String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(6_000); + throw ex; + } + }; + case PATCH_ITEM: + return (paramsWrapper) -> { - try { + CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosPatchItemRequestOptions patchItemRequestOptions = (CosmosPatchItemRequestOptions) paramsWrapper.patchItemRequestOptions; - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + CosmosPatchOperations patchOperations = CosmosPatchOperations.create().add("/number", 555); - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); + try { - Thread.sleep(10_000); + CosmosItemResponse patchItemResponse = asyncContainer.patchItem( + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + patchOperations, + patchItemRequestOptions, + TestObject.class) + .block(); - TestObject testObject = TestObject.create(); + return new OperationExecutionResult<>(patchItemResponse); + } catch (Exception ex) { - String itemIdMappingToUnhealthyPartition = testObject.getId(); + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.QUERY_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) - .region(preferredRegions.get(0)) - .build(); + throw ex; + } + }; + case QUERY_ITEM: + return (paramsWrapper) -> { - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); + CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; - FaultInjectionRule serviceUnavailableRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(13) - .build(); + try { - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(serviceUnavailableRule)) - .block(); + FeedResponse queryItemResponse = asyncContainer.queryItems( + "SELECT * FROM C", + queryRequestOptions, + TestObject.class) + .byPage() + .blockLast(); - container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); + return new OperationExecutionResult<>(queryItemResponse); + } catch (Exception ex) { - for (int i = 1; i <= 15; i++) { - FeedResponse response = container.queryItems("SELECT * FROM c", TestObject.class).byPage().blockLast(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } - assertThat(response).isNotNull(); - assertThat(response.getCosmosDiagnostics()).isNotNull(); + throw ex; + } + }; + case REPLACE_ITEM: + return (paramsWrapper) -> { - response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + TestObject createdTestObject = paramsWrapper.createdTestObject; + CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; - logger.info("CosmosDiagnostics : {}", response.getCosmosDiagnostics().toString()); - } + try { - logger.info("Sleep for 60 seconds!"); - Thread.sleep(60_000); + CosmosItemResponse deleteItemResponse = asyncContainer.replaceItem( + createdTestObject, + createdTestObject.getId(), + new PartitionKey(createdTestObject.getId()), + itemRequestOptions) + .block(); - for (int i = 1; i <= 30; i++) { - FeedResponse response = container.queryItems("SELECT * FROM c", TestObject.class).byPage().blockLast(); - logger.info("Hit count : {}", serviceUnavailableRule.getHitCount()); + return new OperationExecutionResult<>(deleteItemResponse); + } catch (Exception ex) { - assertThat(response).isNotNull(); - assertThat(response.getCosmosDiagnostics()).isNotNull(); + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } - response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } + throw ex; + } + }; + default: + throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); + } + } - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Query operations should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); + private static int getProvisionedThroughputForContainer(FaultInjectionOperationType faultInjectionOperationType) { + switch (faultInjectionOperationType) { + case READ_ITEM: + case UPSERT_ITEM: + case REPLACE_ITEM: + case QUERY_ITEM: + case PATCH_ITEM: + return 12_000; + case DELETE_ITEM: + case CREATE_ITEM: + return 6_000; + default: + throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); } } + @Test(groups = {"multi-master"}) + public void operationHitsServiceUnavailableInSecondPreferredRegion() {} + @Test(groups = {"multi-master"}) public void queryWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion() { List preferredRegions = this.writeRegions; @@ -921,7 +902,7 @@ public void readWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredR CosmosAsyncContainer container = null; CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(12_000); try { @@ -1032,6 +1013,39 @@ public void readWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredR } } + static class OperationExecutionResult { + + private final CosmosItemResponse cosmosItemResponse; + private final CosmosException cosmosException; + private final FeedResponse feedResponse; + + OperationExecutionResult(FeedResponse feedResponse) { + this.feedResponse = feedResponse; + this.cosmosException = null; + this.cosmosItemResponse = null; + } + + OperationExecutionResult(CosmosItemResponse cosmosItemResponse) { + this.cosmosItemResponse = cosmosItemResponse; + this.cosmosException = null; + this.feedResponse = null; + } + + OperationExecutionResult(CosmosException cosmosException) { + this.cosmosException = cosmosException; + this.cosmosItemResponse = null; + this.feedResponse = null; + } + } + + static class OperationInvocationParamsWrapper { + public CosmosAsyncContainer cosmosAsyncContainer; + public TestObject createdTestObject; + public CosmosItemRequestOptions itemRequestOptions; + public CosmosQueryRequestOptions queryRequestOptions; + public CosmosItemRequestOptions patchItemRequestOptions; + } + private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { Iterator locationIterator = writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 1e342bd457e7..b7ab9010ab63 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -322,7 +322,7 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( // if partition-level circuit breaker is enabled if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - this.globalPartitionEndpointManager.tryMarkRegionAsUnavailableForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); + this.globalPartitionEndpointManager.handleLocationExceptionForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); } // The request has failed with 503, SDK need to decide whether it is safe to retry for write operations diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 29fa078ce158..8b879cd4147a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -15,10 +15,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicReference; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -27,50 +26,67 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); private final GlobalEndpointManager globalEndpointManager; - private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; - private final ConcurrentHashMap partitionsWithPossibleUnavailableRegions; + private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; + private final ConcurrentHashMap partitionsWithPossibleUnavailableRegions; + private final LocationContextTransitionHandler locationContextTransitionHandler; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); this.partitionsWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; + this.locationContextTransitionHandler = new LocationContextTransitionHandler(); } public void init() { this.updateStaleLocationInfo().subscribeOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE).subscribe(); } - public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceRequest request, URI failedLocation) { + public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest request, URI failedLocation) { - if (request == null) { - throw new IllegalArgumentException("request cannot be null!"); - } - - if (request.requestContext == null) { - - if (logger.isDebugEnabled()) { - logger.warn("requestContext is null!"); - } - - return false; - } + checkNotNull(request, "request cannot be null!"); + checkNotNull(request.requestContext, "requestContext cannot be null!"); PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + String resourceId = request.getResourceId(); if (partitionKeyRange == null) { - return false; + return; } + checkNotNull(resourceId, "resourceId cannot be null!"); + + logger.info("Handling exception : {}", resourceId); + + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + AtomicBoolean isFailoverPossible = new AtomicBoolean(true); AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); - if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); - } + if (partitionLevelLocationUnavailabilityInfoSnapshot == null) { + this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + + if (partitionKeyRangeFailoverInfoAsVal == null) { + partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); + } + + isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.handleException(partitionKeyRangeWrapperAsKey, failedLocation)); - isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.isFailureThresholdBreachedForLocation(partitionKeyRangeAsKey, failedLocation)); + if (isFailureThresholdBreached.get()) { + + UnmodifiableList applicableEndpoints = request.isReadOnly() ? + this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : + this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); + + isFailoverPossible.set( + partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapperAsKey, applicableEndpoints)); + } + + return partitionKeyRangeFailoverInfoAsVal; + }); + } else { + isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoSnapshot.handleException(partitionKeyRangeWrapper, failedLocation)); if (isFailureThresholdBreached.get()) { @@ -79,93 +95,83 @@ public boolean tryMarkRegionAsUnavailableForPartitionKeyRange(RxDocumentServiceR this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); isFailoverPossible.set( - partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeAsKey, applicableEndpoints)); + partitionLevelLocationUnavailabilityInfoSnapshot.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapper, applicableEndpoints)); } - - return partitionKeyRangeFailoverInfoAsVal; - }); + } // set to true if and only if failure threshold exceeded for the region // and if failover is possible // a failover is only possible when there are available regions left to fail over to if (isFailoverPossible.get()) { - return true; + return; } // no regions to fail over to - this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); - return false; + this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRangeWrapper); } - public boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { + public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { - if (request == null) { - throw new IllegalArgumentException("request cannot be null!"); - } - - if (request.requestContext == null) { - - if (logger.isDebugEnabled()) { - logger.warn("requestContext is null!"); - } - - return false; - } + checkNotNull(request, "request cannot be null!"); + checkNotNull(request.requestContext, "requestContext cannot be null!"); PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; if (partitionKeyRange == null) { - return false; + return; } - URI succeededLocation = request.requestContext.locationEndpointToRoute; + String resourceId = request.getResourceId(); + logger.info("Handling success : {}", resourceId); - if (this.partitionKeyRangeToFailoverInfo.containsKey(partitionKeyRange)) { - this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRange, (partitionKeyRangeAsKey, partitionKeyRangeFailoverInfoAsVal) -> { + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); - if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); - } + URI succeededLocation = request.requestContext.locationEndpointToRoute; - partitionKeyRangeFailoverInfoAsVal.bookmarkSuccess(partitionKeyRange, succeededLocation); - return partitionKeyRangeFailoverInfoAsVal; - }); - } + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot + = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); - return false; + if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { + partitionLevelLocationUnavailabilityInfoSnapshot.handleSuccess(partitionKeyRangeWrapper, succeededLocation); + } } - public List getUnavailableLocationsForPartition(PartitionKeyRange partitionKeyRange) { + public List getUnavailableLocationEndpointsForPartitionKeyRange(String resourceId, PartitionKeyRange partitionKeyRange) { checkNotNull(partitionKeyRange, "Supplied partitionKeyRange cannot be null!"); + checkNotNull(resourceId, "Supplied resourceId cannot be null!"); + + logger.info("Fetching unavailable regions for resource address : {}", resourceId); + + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = - this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); List unavailableLocations = new ArrayList<>(); boolean doesPartitionHaveUnavailableLocations = false; if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { - Map locationEndpointToFailureMetricsForPartition = - partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToFailureMetricsForPartition; + Map locationEndpointToFailureMetricsForPartition = + partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition; - for (Map.Entry pair : locationEndpointToFailureMetricsForPartition.entrySet()) { + for (Map.Entry pair : locationEndpointToFailureMetricsForPartition.entrySet()) { URI location = pair.getKey(); - FailureMetricsForPartition failureMetricsForPartition = pair.getValue(); + LocationSpecificContext locationSpecificContext = pair.getValue(); - if (failureMetricsForPartition.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.FreshUnavailable) { + if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.FreshUnavailable) { unavailableLocations.add(location); doesPartitionHaveUnavailableLocations = true; - } else if (failureMetricsForPartition.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable) { + } else if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.StaleUnavailable) { doesPartitionHaveUnavailableLocations = true; - } else if (failureMetricsForPartition.failureCount.get() >= 1) { + } else if (locationSpecificContext.exceptionCount >= 1) { doesPartitionHaveUnavailableLocations = true; } } } if (!doesPartitionHaveUnavailableLocations) { - this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRange); + this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRangeWrapper); } return UnmodifiableList.unmodifiableList(unavailableLocations); @@ -177,21 +183,30 @@ private Flux updateStaleLocationInfo() { .repeat() .flatMap(ignore -> Flux.fromIterable(this.partitionsWithPossibleUnavailableRegions.entrySet())) .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) - .flatMap(partitionKeyRangeToPartitionKeyRangePair -> { + .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { logger.info("Background updateStaleLocationInfo kicking in..."); - PartitionKeyRange partitionKeyRange = partitionKeyRangeToPartitionKeyRangePair.getKey(); + PartitionKeyRangeWrapper partitionKeyRangeWrapper = partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair.getKey(); - PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRange); + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); if (partitionLevelLocationUnavailabilityInfo != null) { - for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToFailureMetricsForPartition.entrySet()) { - FailureMetricsForPartition failureMetricsForPartition = locationToLocationLevelMetrics.getValue(); - failureMetricsForPartition.handleSuccess(false, locationToLocationLevelMetrics.getKey(), partitionKeyRange); + for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.entrySet()) { + + URI locationWithStaleUnavailabilityInfo = locationToLocationLevelMetrics.getKey(); + + partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationContextTransitionHandler.handleSuccess(locationSpecificContextAsVal, partitionKeyRangeWrapper, locationWithStaleUnavailabilityInfoAsKey, false); + } + + return locationSpecificContextAsVal; + }); } } else { - this.partitionsWithPossibleUnavailableRegions.remove(partitionKeyRange); + this.partitionsWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); } return Mono.empty(); @@ -200,247 +215,352 @@ private Flux updateStaleLocationInfo() { private class PartitionLevelLocationUnavailabilityInfo { - private final ConcurrentHashMap locationEndpointToFailureMetricsForPartition; + private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; PartitionLevelLocationUnavailabilityInfo() { - this.locationEndpointToFailureMetricsForPartition = new ConcurrentHashMap<>(); + this.locationEndpointToLocationSpecificContextForPartition = new ConcurrentHashMap<>(); } - public boolean isFailureThresholdBreachedForLocation(PartitionKeyRange partitionKeyRange, URI locationWithFailure) { + public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException) { - AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); + AtomicBoolean isExceptionThresholdBreached = new AtomicBoolean(false); - this.locationEndpointToFailureMetricsForPartition.compute(locationWithFailure, (locationAsKey, failureMetricsForPartitionAsVal) -> { + this.locationEndpointToLocationSpecificContextForPartition.compute(locationWithException, (locationAsKey, locationSpecificContextAsVal) -> { - if (failureMetricsForPartitionAsVal == null) { - failureMetricsForPartitionAsVal = new FailureMetricsForPartition(); + if (locationSpecificContextAsVal == null) { + locationSpecificContextAsVal = new LocationSpecificContext(0, 0, Instant.MAX, LocationUnavailabilityStatus.Available, false); } - failureMetricsForPartitionAsVal.handleFailure(partitionKeyRange, locationAsKey); + LocationSpecificContext locationSpecificContextAfterTransition = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationContextTransitionHandler.handleException(locationSpecificContextAsVal, partitionKeyRangeWrapper, locationWithException); - isFailureThresholdBreached.set(failureMetricsForPartitionAsVal.isFailureThresholdBreached()); - return failureMetricsForPartitionAsVal; + isExceptionThresholdBreached.set(locationSpecificContextAfterTransition.isExceptionThresholdBreached()); + return locationSpecificContextAfterTransition; }); - return isFailureThresholdBreached.get(); + return isExceptionThresholdBreached.get(); } - public void bookmarkSuccess(PartitionKeyRange partitionKeyRange, URI succeededLocation) { - this.locationEndpointToFailureMetricsForPartition.compute(succeededLocation, (locationAsKey, failureMetricsForPartitionAsVal) -> { + public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation) { + this.locationEndpointToLocationSpecificContextForPartition.compute(succeededLocation, (locationAsKey, locationSpecificContextAsVal) -> { - if (failureMetricsForPartitionAsVal != null) { - failureMetricsForPartitionAsVal.handleSuccess(false, succeededLocation, partitionKeyRange);; + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationContextTransitionHandler.handleSuccess(locationSpecificContextAsVal, partitionKeyRangeWrapper, succeededLocation, false); } - return failureMetricsForPartitionAsVal; + return locationSpecificContextAsVal; }); } - public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRange partitionKeyRange, List availableLocationsAtAccountLevel) { + public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper, List availableLocationsAtAccountLevel) { for (URI availableLocation : availableLocationsAtAccountLevel) { - if (!this.locationEndpointToFailureMetricsForPartition.containsKey(availableLocation)) { + if (!this.locationEndpointToLocationSpecificContextForPartition.containsKey(availableLocation)) { return true; } else { - FailureMetricsForPartition failureMetricsForPartition = this.locationEndpointToFailureMetricsForPartition.get(availableLocation); + LocationSpecificContext locationSpecificContextSnapshot = this.locationEndpointToLocationSpecificContextForPartition.get(availableLocation); - if (failureMetricsForPartition.isRegionAvailableToProcessRequests()) { + if (locationSpecificContextSnapshot.isRegionAvailableToProcessRequests()) { return true; } } } Instant mostStaleUnavailableTimeAcrossRegions = Instant.MAX; - FailureMetricsForPartition locationLevelFailureMetadataForMostStaleLocation = null; + LocationSpecificContext locationLevelFailureMetadataForMostStaleLocation = null; URI mostStaleUnavailableLocation = null; // find region with most 'stale' unavailability - for (Map.Entry uriToLocationLevelFailureMetadata : this.locationEndpointToFailureMetricsForPartition.entrySet()) { - FailureMetricsForPartition failureMetricsForPartition = uriToLocationLevelFailureMetadata.getValue(); + for (Map.Entry uriToLocationLevelFailureMetadata : this.locationEndpointToLocationSpecificContextForPartition.entrySet()) { + LocationSpecificContext locationSpecificContext = uriToLocationLevelFailureMetadata.getValue(); - if (failureMetricsForPartition.isRegionAvailableToProcessRequests()) { + if (locationSpecificContext.isRegionAvailableToProcessRequests()) { return true; } - Instant unavailableSinceSnapshot = failureMetricsForPartition.unavailableSince.get(); + Instant unavailableSinceSnapshot = locationSpecificContext.unavailableSince; if (mostStaleUnavailableTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { mostStaleUnavailableTimeAcrossRegions = unavailableSinceSnapshot; mostStaleUnavailableLocation = uriToLocationLevelFailureMetadata.getKey(); - locationLevelFailureMetadataForMostStaleLocation = failureMetricsForPartition; + locationLevelFailureMetadataForMostStaleLocation = locationSpecificContext; } } if (locationLevelFailureMetadataForMostStaleLocation != null) { - locationLevelFailureMetadataForMostStaleLocation.handleSuccess(true, mostStaleUnavailableLocation, partitionKeyRange); - return true; + this.locationEndpointToLocationSpecificContextForPartition.compute(mostStaleUnavailableLocation, (mostStaleUnavailableLocationAsKey, locationSpecificStatusAsVal) -> { + + if (locationSpecificStatusAsVal != null) { + locationSpecificStatusAsVal = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationContextTransitionHandler.handleSuccess(locationSpecificStatusAsVal, partitionKeyRangeWrapper, mostStaleUnavailableLocationAsKey, true); + } + + return locationSpecificStatusAsVal; + }); } return false; } } - private class FailureMetricsForPartition { - private final AtomicInteger failureCount = new AtomicInteger(0); - private final AtomicInteger successCount = new AtomicInteger(0); - private final AtomicReference unavailableSince = new AtomicReference<>(Instant.MAX); - private final AtomicReference partitionScopedRegionUnavailabilityStatus = new AtomicReference<>(PartitionScopedRegionUnavailabilityStatus.Available); - private final AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); + private static class LocationSpecificContext { + private final int exceptionCount; + private final int successCount; + private final Instant unavailableSince; + private final LocationUnavailabilityStatus locationUnavailabilityStatus; + private final boolean isExceptionThresholdBreached; + + public LocationSpecificContext( + int successCount, + int exceptionCount, + Instant unavailableSince, + LocationUnavailabilityStatus locationUnavailabilityStatus, + boolean isExceptionThresholdBreached) { + + this.successCount = successCount; + this.exceptionCount = exceptionCount; + this.unavailableSince = unavailableSince; + this.locationUnavailabilityStatus = locationUnavailabilityStatus; + this.isExceptionThresholdBreached = isExceptionThresholdBreached; + } - public void handleSuccess(boolean forceStateChange, URI location, PartitionKeyRange partitionKeyRange) { + public boolean isExceptionThresholdBreached() { + return this.isExceptionThresholdBreached; + } - logger.info("Handling success"); + public boolean isRegionAvailableToProcessRequests() { + return this.locationUnavailabilityStatus == LocationUnavailabilityStatus.Available || + this.locationUnavailabilityStatus == LocationUnavailabilityStatus.StaleUnavailable; + } + } - PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); + private class LocationContextTransitionHandler { + + public LocationSpecificContext handleSuccess( + LocationSpecificContext locationSpecificContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + URI locationWithSuccess, + boolean forceStatusChange) { + + logger.info("Handling success"); + LocationUnavailabilityStatus currentStatusSnapshot = locationSpecificContext.locationUnavailabilityStatus; double allowedFailureRatio = getAllowedFailureRatioByStatus(currentStatusSnapshot); + int exceptionCountActual = locationSpecificContext.exceptionCount; + int successCountActual = locationSpecificContext.successCount; + switch (currentStatusSnapshot) { case Available: - if (!forceStateChange) { - if (failureCount.get() > 0) { - failureCount.decrementAndGet(); + if (!forceStatusChange) { + if (exceptionCountActual > 0) { + exceptionCountActual -= 1; + return new LocationSpecificContext( + locationSpecificContext.successCount, + exceptionCountActual, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached + ); } } break; case StaleUnavailable: - if (!forceStateChange) { - successCount.incrementAndGet(); - if (successCount.get() > 10 && (double) failureCount.get() / (double) successCount.get() < allowedFailureRatio) { - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.Available); - logger.info("Partition {}-{} marked as Available from StaleUnavailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + if (!forceStatusChange) { + successCountActual += 1; + logger.info("Try to switch to Available but actual success count : {}", successCountActual); + if (successCountActual > 10 && (double) exceptionCountActual / (double) successCountActual < allowedFailureRatio) { + logger.info("Partition {}-{} of collection : {} marked as Available from StaleUnavailable for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithSuccess, OperationType.Read)); + return this.transitionHealthStatus(LocationUnavailabilityStatus.Available); + } else { + return new LocationSpecificContext( + successCountActual, + exceptionCountActual, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); } } break; case FreshUnavailable: - if (!forceStateChange) { - if (Duration.between(this.unavailableSince.get(), Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + Instant unavailableSinceActual = locationSpecificContext.unavailableSince; + if (!forceStatusChange) { + if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { + logger.info("Partition {}-{} of collection : {} marked as StaleUnavailable from FreshUnavailable for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithSuccess, OperationType.Read)); + + return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); } } else { - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.StaleUnavailable); - logger.info("Partition {}-{} marked as StaleUnavailable from FreshAvailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + logger.info("Partition {}-{} of collection : {} marked as StaleUnavailable from FreshAvailable for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithSuccess, OperationType.Read)); + return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); } break; default: throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); } + + return locationSpecificContext; } - public void handleFailure(PartitionKeyRange partitionKeyRange, URI location) { + public LocationSpecificContext handleException( + LocationSpecificContext locationSpecificContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + URI locationWithException) { - logger.error("Handling failure"); + logger.warn("Handling exception"); - PartitionScopedRegionUnavailabilityStatus currentStatusSnapshot = this.partitionScopedRegionUnavailabilityStatus.get(); + LocationUnavailabilityStatus currentStatusSnapshot = locationSpecificContext.locationUnavailabilityStatus; + int allowedExceptionCount = getAllowedFailureCountByStatus(currentStatusSnapshot); - int allowedFailureCount = getAllowedFailureCountByStatus(currentStatusSnapshot); + int exceptionCountActual = locationSpecificContext.exceptionCount; switch (currentStatusSnapshot) { case Available: - if (failureCount.get() < allowedFailureCount) { - failureCount.addAndGet(1); - logger.error("Failure count : {}", failureCount.get()); - logger.error("Allowed failure count : {}", allowedFailureCount); + if (exceptionCountActual < allowedExceptionCount) { + exceptionCountActual++; + logger.info("Exception count : {}", exceptionCountActual); + return new LocationSpecificContext( + locationSpecificContext.successCount, + exceptionCountActual, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); } else { - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); - GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRange, partitionKeyRange); - logger.info("Partition {}-{} marked as FreshUnavailable from Available for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + GlobalPartitionEndpointManagerForCircuitBreaker + .this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); + logger.info("Partition {}-{} of collection : {} marked as FreshUnavailable from Available for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, OperationType.Read)); + return this.transitionHealthStatus(LocationUnavailabilityStatus.FreshUnavailable); } - break; case StaleUnavailable: - if (failureCount.get() < allowedFailureCount) { - failureCount.addAndGet(1); + if (exceptionCountActual < allowedExceptionCount) { + exceptionCountActual++; + return new LocationSpecificContext( + locationSpecificContext.successCount, + exceptionCountActual, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); } else { - this.setHealthStatus(PartitionScopedRegionUnavailabilityStatus.FreshUnavailable); - logger.info("Partition {}-{} marked as FreshUnavailable from StaleUnavailable for region : {}", partitionKeyRange.getMinInclusive(), partitionKeyRange.getMaxExclusive(), GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager.getRegionName(location, OperationType.Read)); + logger.info("Partition {}-{} of collection : {} marked as FreshUnavailable from StaleUnavailable for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, OperationType.Read)); + return this.transitionHealthStatus(LocationUnavailabilityStatus.FreshUnavailable); } - break; default: throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); } } - public void setHealthStatus(PartitionScopedRegionUnavailabilityStatus status) { - this.partitionScopedRegionUnavailabilityStatus.updateAndGet(previousStatus -> { - - PartitionScopedRegionUnavailabilityStatus newStatus; + public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStatus newStatus) { - switch (status) { - case Available: - if (previousStatus == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable) { - this.failureCount.set(0); - this.successCount.set(0); - this.unavailableSince.set(Instant.MAX); - this.isFailureThresholdBreached.set(false); - } - newStatus = status; - break; - case FreshUnavailable: - if (previousStatus == PartitionScopedRegionUnavailabilityStatus.Available) { - this.failureCount.set(0); - this.successCount.set(0); - this.unavailableSince.set(Instant.now()); - this.isFailureThresholdBreached.set(true); - } - newStatus = status; - break; - case StaleUnavailable: - this.failureCount.set(0); - this.successCount.set(0); - this.unavailableSince.set(Instant.MAX); - this.isFailureThresholdBreached.set(false); - newStatus = status; - break; - default: - throw new IllegalStateException("Unsupported health status: " + status); - } - - return newStatus; - }); - } - - private static double getAllowedFailureRatioByStatus(PartitionScopedRegionUnavailabilityStatus status) { - switch (status) { + switch (newStatus) { case Available: - return 0.3d; + return new LocationSpecificContext( + 0, + 0, + Instant.MAX, + LocationUnavailabilityStatus.Available, + false + ); + case FreshUnavailable: + return new LocationSpecificContext( + 0, + 0, + Instant.now(), + LocationUnavailabilityStatus.FreshUnavailable, + true + ); case StaleUnavailable: - return 0.1d; + return new LocationSpecificContext( + 0, + 0, + Instant.MAX, + LocationUnavailabilityStatus.StaleUnavailable, + false + ); default: - return 0d; + throw new IllegalStateException("Unsupported health status: " + newStatus); } } + } - private static int getAllowedFailureCountByStatus(PartitionScopedRegionUnavailabilityStatus status) { - switch (status) { - case Available: - return 10; - case StaleUnavailable: - return 5; - default: - throw new IllegalStateException("Unsupported health status: " + status); - } + private static class PartitionKeyRangeWrapper { + final PartitionKeyRange partitionKeyRange; + final String resourceId; + + private PartitionKeyRangeWrapper(PartitionKeyRange partitionKeyRange, String resourceId) { + this.partitionKeyRange = partitionKeyRange; + this.resourceId = resourceId; } - public boolean isFailureThresholdBreached() { - return this.isFailureThresholdBreached.get(); + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PartitionKeyRangeWrapper that = (PartitionKeyRangeWrapper) o; + return Objects.equals(partitionKeyRange, that.partitionKeyRange) && Objects.equals(resourceId, that.resourceId); } - public boolean isRegionAvailableToProcessRequests() { - return this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.Available || - this.partitionScopedRegionUnavailabilityStatus.get() == PartitionScopedRegionUnavailabilityStatus.StaleUnavailable; + @Override + public int hashCode() { + return Objects.hash(partitionKeyRange, resourceId); } } - private enum PartitionScopedRegionUnavailabilityStatus { + private enum LocationUnavailabilityStatus { Available(100), FreshUnavailable(200), StaleUnavailable(300); private int priority; - PartitionScopedRegionUnavailabilityStatus(int priority) { + LocationUnavailabilityStatus(int priority) { this.priority = priority; } } + + private static double getAllowedFailureRatioByStatus(LocationUnavailabilityStatus status) { + switch (status) { + case Available: + return 0.3d; + case StaleUnavailable: + return 0.1d; + default: + return 0d; + } + } + + private static int getAllowedFailureCountByStatus(LocationUnavailabilityStatus status) { + switch (status) { + case Available: + return 10; + case StaleUnavailable: + return 5; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index de2bcdc8afac..70be375cd66b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2953,6 +2953,7 @@ private Mono> deleteDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); + req.requestContext.setPointOperationContext(pointOperationContext); requestReference.set(req); if (retryPolicyInstance != null) { @@ -5596,7 +5597,7 @@ private void addPartitionLevelUnavailableRegionsForRequest( checkNotNull(partitionKeyRange, "partitionKeyRange cannot be null!"); checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); - List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationsForPartition(partitionKeyRange); + List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), partitionKeyRange); List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); @@ -5623,7 +5624,7 @@ private void addPartitionLevelUnavailableRegionsForFeedRequest( if (Configs.isPartitionLevelCircuitBreakerEnabled()) { checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); - List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationsForPartition(resolvedPartitionKeyRange); + List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), resolvedPartitionKeyRange); List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); @@ -6176,7 +6177,7 @@ private void tryMarkPartitionKeyRangeAsUnavailableForRegion(RxDocumentServiceReq URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); if (firstContactedLocationEndpoint != null) { - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 6849acd0bb75..0467f5284f2d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -546,7 +546,11 @@ private Mono invokeAsyncInternal(RxDocumentServiceReq private Mono invokeAsync(RxDocumentServiceRequest request) { - Callable> funcDelegate = () -> invokeAsyncInternal(request).single().doOnSuccess(ignore -> this.globalPartitionEndpointManager.tryBookmarkRegionSuccessForPartitionKeyRange(request)); + Callable> funcDelegate = () -> invokeAsyncInternal(request).single().doOnSuccess(ignore -> { + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + this.globalPartitionEndpointManager.handleLocationSuccessForPartitionKeyRange(request); + } + }); MetadataRequestRetryPolicy metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(this.globalEndpointManager); metadataRequestRetryPolicy.onBeforeSendRequest(request); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index fec82c4b0e05..d065ee549caf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -15,7 +15,6 @@ import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; -import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.IRetryPolicy; import com.azure.cosmos.implementation.ISessionToken; import com.azure.cosmos.implementation.InternalServerErrorException; @@ -194,7 +193,10 @@ private RxDocumentServiceResponse completeResponse( rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = addressResolver.getGlobalPartitionEndpointManagerForCircuitBreaker(); - globalPartitionEndpointManagerForCircuitBreaker.tryBookmarkRegionSuccessForPartitionKeyRange(request); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); + } return rxDocumentServiceResponse; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 4d68ff850502..4a2ed7c27aaa 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -3,17 +3,14 @@ package com.azure.cosmos.implementation.query; -import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.FeedOperationContext; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; -import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; -import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.ModelBridgeInternal; @@ -24,13 +21,10 @@ import java.net.URI; import java.util.List; -import java.util.Optional; -import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Function; import java.util.function.Supplier; -import java.util.stream.Collectors; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -225,7 +219,7 @@ private void tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest fail URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); if (firstContactedLocationEndpoint != null) { - this.globalPartitionEndpointManagerForCircuitBreaker.tryMarkRegionAsUnavailableForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); } } } From 47cc306a40a4022e5475d1dddcd007780e62858d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 20 May 2024 12:42:35 -0400 Subject: [PATCH 031/140] Integrate readMany for partition-level circuit breaker. --- .../PartitionLevelCircuitBreakerTests.java | 300 ++++++++++++++++-- .../query/ReadManySplitTest.java | 7 +- .../implementation/FeedOperationContext.java | 19 +- ...itionEndpointManagerForCircuitBreaker.java | 16 +- .../implementation/RxDocumentClientImpl.java | 47 ++- .../caches/RxPartitionKeyRangeCache.java | 1 + .../DocumentQueryExecutionContextBase.java | 1 + .../DocumentQueryExecutionContextFactory.java | 4 +- .../cosmos/implementation/query/Fetcher.java | 4 +- ...ParallelDocumentQueryExecutionContext.java | 4 +- ...llelDocumentQueryExecutionContextBase.java | 8 +- .../query/PipelinedQueryExecutionContext.java | 4 +- 12 files changed, 337 insertions(+), 78 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 71c3d12ebaa0..396b0befb8cb 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -15,10 +15,14 @@ import com.azure.cosmos.TestObject; import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; import com.azure.cosmos.faultinjection.FaultInjectionTestBase; +import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.guava25.base.Function; -import com.azure.cosmos.implementation.throughputControl.TestItem; +import com.azure.cosmos.models.CosmosBatch; +import com.azure.cosmos.models.CosmosBatchResponse; +import com.azure.cosmos.models.CosmosContainerIdentity; import com.azure.cosmos.models.CosmosContainerProperties; +import com.azure.cosmos.models.CosmosItemIdentity; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; import com.azure.cosmos.models.CosmosPatchItemRequestOptions; @@ -50,12 +54,14 @@ import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; +import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.fail; @@ -64,6 +70,15 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { private List writeRegions; + private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) + .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) + .build(); + + private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_TIMEOUT + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) + .build(); + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") public PartitionLevelCircuitBreakerTests(CosmosClientBuilder cosmosClientBuilder) { super(cosmosClientBuilder); @@ -85,13 +100,14 @@ public void beforeClass() { @DataProvider(name = "partitionLevelCircuitBreakerTestConfigs") public Object[][] partitionLevelCircuitBreakerTestConfigs() { return new Object[][] { -// {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, -// {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, -// {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, -// {FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, -// {FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, -// {FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, -// {FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.BATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, @@ -102,6 +118,16 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { }; } + @DataProvider(name = "readManyTestConfigs") + public Object[][] readManyTestConfigs() { + return new Object[][] { + {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofMinutes(6), false, false}, + {FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, true}, + }; + } + @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs") public void operationHitsTerminalExceptionInFirstPreferredRegion( FaultInjectionOperationType faultInjectionOperationType, @@ -131,6 +157,8 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(getProvisionedThroughputForContainer(faultInjectionOperationType)); + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + try { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); @@ -186,24 +214,16 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( if (faultInjectionRule != null) { - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(faultInjectionRule)) - .block(); - Function> faultInjectedFunc = generateOperation(faultInjectionOperationType); assertThat(faultInjectedFunc).isNotNull().as("faultInjectedFunc cannot be null!"); - OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); - operationInvocationParamsWrapper.cosmosAsyncContainer = container; - if (shouldEndToEndTimeoutBeInjected) { CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = (shouldThresholdBasedAvailabilityStrategyBeEnabled) ? - new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) - .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()).build() : - new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build(); + TWO_SECOND_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY : + TWO_SECOND_TIMEOUT; operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions() .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); @@ -215,6 +235,12 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); } + operationInvocationParamsWrapper.asyncContainer = container; + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(operationInvocationParamsWrapper.asyncContainer, Arrays.asList(faultInjectionRule)) + .block(); + for (int i = 1; i <= 15; i++) { operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); @@ -241,6 +267,13 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( regionContacted -> logger.info("Region contacted : {}", regionContacted) ); + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + + response.batchResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); } } @@ -274,6 +307,13 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( regionContacted -> logger.info("Region contacted : {}", regionContacted) ); + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + + response.batchResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); } } } @@ -291,6 +331,169 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( } } + @Test(groups = {"multi-master"}, dataProvider = "readManyTestConfigs") + public void readManyOperationHitsTerminalExceptionInFirstPreferredRegion( + FaultInjectionServerErrorType faultInjectionServerErrorType, + int faultInjectionHitCount, + Duration faultInjectionDuration, + boolean shouldEndToEndTimeoutBeInjected, + boolean shouldThresholdBasedAvailabilityStrategyBeEnabled) { + + List preferredRegions = this.writeRegions; + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); + + if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("queryWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + + String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + + CosmosAsyncContainer container = null; + CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/mypk"); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(12_000); + + try { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + + database.createContainerIfNotExists(containerProperties, throughputProperties).block(); + container = database.getContainer(multiPartitionContainerId); + + Thread.sleep(10_000); + + List feedRanges = container.getFeedRanges().block(); + + assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); + assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); + + Map> partitionKeyToItemIdentityList = new HashMap<>(); + List partitionKeys = new ArrayList<>(); + + for (FeedRange feedRange : feedRanges) { + String pkForFeedRange = UUID.randomUUID().toString(); + partitionKeys.add(pkForFeedRange); + partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); + for (int i = 0; i < 10; i++) { + TestObject testObject = TestObject.create(pkForFeedRange); + partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); + container.createItem(testObject, new PartitionKey(pkForFeedRange), new CosmosItemRequestOptions()).block(); + } + } + + PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(FaultInjectionOperationType.QUERY_ITEM) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(faultyPartitionKey)).build()) + .region(preferredRegions.get(0)) + .build(); + + FaultInjectionRule faultInjectionRule = null; + + if (faultInjectionServerErrorType == FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) { + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + faultInjectionRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(faultInjectionHitCount) + .build(); + } else if (faultInjectionServerErrorType == FaultInjectionServerErrorType.GONE) { + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.GONE) + .build(); + + faultInjectionRule = new FaultInjectionRuleBuilder("gone-exception" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(Duration.ofMinutes(7)) + .build(); + } + + if (faultInjectionRule != null) { + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(container, Arrays.asList(faultInjectionRule)) + .block(); + + for (int i = 1; i <= 15; i++) { + List itemIdentities = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); + + FeedResponse response = container + .readMany(itemIdentities, TestObject.class) + .onErrorResume(throwable -> { + if (throwable instanceof OperationCancelledException) { + logger.error("OperationCancelledException thrown!"); + } + + return Mono.empty(); + }) + .block(); + + logger.info("Hit count : {}", faultInjectionRule.getHitCount()); + + if (response != null) { + assertThat(response).isNotNull(); + assertThat(response.getCosmosDiagnostics()).isNotNull(); + + response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } + + logger.info("Sleep for 120 seconds"); + Thread.sleep(120_000); + + for (int i = 16; i <= 30; i++) { + + List itemIdentities = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); + + FeedResponse response = container + .readMany(itemIdentities, TestObject.class) + .onErrorResume(throwable -> { + if (throwable instanceof OperationCancelledException) { + logger.error("OperationCancelledException thrown!"); + } + + return Mono.empty(); + }) + .block(); + + logger.info("Hit count : {}", faultInjectionRule.getHitCount()); + + if (response != null) { + assertThat(response).isNotNull(); + assertThat(response.getCosmosDiagnostics()).isNotNull(); + + response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } + } + } + + logger.info("End test"); + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Query operations should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + safeDeleteCollection(container); + safeClose(client); + } + } + @Test(groups = {"multi-master"}) public void operationHitsTerminalExceptionInMultipleContainers() { logger.info("Checking circuit breaking behavior for {}", FaultInjectionOperationType.READ_ITEM); @@ -380,10 +583,10 @@ public void operationHitsTerminalExceptionInMultipleContainers() { for (int i = 1; i <= 15; i++) { paramsWrapper1.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper1.cosmosAsyncContainer = container1; + paramsWrapper1.asyncContainer = container1; paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper2.cosmosAsyncContainer = container2; + paramsWrapper2.asyncContainer = container2; OperationExecutionResult response1 = faultInjectedFunc.apply(paramsWrapper1); OperationExecutionResult response2 = faultInjectedFunc.apply(paramsWrapper2); @@ -442,10 +645,10 @@ public void operationHitsTerminalExceptionInMultipleContainers() { for (int i = 16; i <= 30; i++) { paramsWrapper1.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper1.cosmosAsyncContainer = container1; + paramsWrapper1.asyncContainer = container1; paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper2.cosmosAsyncContainer = container2; + paramsWrapper2.asyncContainer = container2; OperationExecutionResult response1 = faultInjectedFunc.apply(paramsWrapper1); OperationExecutionResult response2 = faultInjectedFunc.apply(paramsWrapper2); @@ -525,6 +728,7 @@ private static int getTestObjectCountToBootstrapFrom(FaultInjectionOperationType case DELETE_ITEM: return 2 * opCount; case CREATE_ITEM: + case BATCH_ITEM: return 0; default: throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); @@ -537,7 +741,7 @@ private static Function { - CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; TestObject createdTestObject = paramsWrapper.createdTestObject; CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; @@ -564,7 +768,7 @@ private static Function { - CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; TestObject createdTestObject = paramsWrapper.createdTestObject; CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; @@ -590,7 +794,7 @@ private static Function { - CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; TestObject createdTestObject = TestObject.create(); CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; @@ -616,7 +820,7 @@ private static Function { - CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; TestObject createdTestObject = paramsWrapper.createdTestObject; CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; @@ -642,7 +846,7 @@ private static Function { - CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; TestObject createdTestObject = paramsWrapper.createdTestObject; CosmosPatchItemRequestOptions patchItemRequestOptions = (CosmosPatchItemRequestOptions) paramsWrapper.patchItemRequestOptions; @@ -672,7 +876,7 @@ private static Function { - CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; try { @@ -698,7 +902,7 @@ private static Function { - CosmosAsyncContainer asyncContainer = paramsWrapper.cosmosAsyncContainer; + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; TestObject createdTestObject = paramsWrapper.createdTestObject; CosmosItemRequestOptions itemRequestOptions = paramsWrapper.itemRequestOptions; @@ -719,6 +923,28 @@ private static Function(cosmosException); } + throw ex; + } + }; + case BATCH_ITEM: + return (paramsWrapper) -> { + + TestObject testObject = TestObject.create(); + CosmosBatch batch = CosmosBatch.createCosmosBatch(new PartitionKey(testObject.getId())); + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + + batch.createItemOperation(testObject); + batch.readItemOperation(testObject.getId()); + + try { + CosmosBatchResponse batchResponse = asyncContainer.executeCosmosBatch(batch).block(); + return new OperationExecutionResult<>(batchResponse); + } catch (Exception ex) { + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } + throw ex; } }; @@ -737,6 +963,7 @@ private static int getProvisionedThroughputForContainer(FaultInjectionOperationT return 12_000; case DELETE_ITEM: case CREATE_ITEM: + case BATCH_ITEM: return 6_000; default: throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); @@ -1018,28 +1245,39 @@ static class OperationExecutionResult { private final CosmosItemResponse cosmosItemResponse; private final CosmosException cosmosException; private final FeedResponse feedResponse; + private final CosmosBatchResponse batchResponse; OperationExecutionResult(FeedResponse feedResponse) { this.feedResponse = feedResponse; this.cosmosException = null; this.cosmosItemResponse = null; + this.batchResponse = null; } OperationExecutionResult(CosmosItemResponse cosmosItemResponse) { this.cosmosItemResponse = cosmosItemResponse; this.cosmosException = null; this.feedResponse = null; + this.batchResponse = null; } OperationExecutionResult(CosmosException cosmosException) { this.cosmosException = cosmosException; this.cosmosItemResponse = null; this.feedResponse = null; + this.batchResponse = null; + } + + OperationExecutionResult(CosmosBatchResponse batchResponse) { + this.cosmosException = null; + this.cosmosItemResponse = null; + this.feedResponse = null; + this.batchResponse = batchResponse; } } - static class OperationInvocationParamsWrapper { - public CosmosAsyncContainer cosmosAsyncContainer; + private static class OperationInvocationParamsWrapper { + public CosmosAsyncContainer asyncContainer; public TestObject createdTestObject; public CosmosItemRequestOptions itemRequestOptions; public CosmosQueryRequestOptions queryRequestOptions; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java index 58cf734a970a..8fde4699ccf4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java @@ -5,6 +5,7 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.Document; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; +import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.Resource; import com.azure.cosmos.implementation.ResourceType; @@ -59,9 +60,13 @@ public void requestCreationOnSplitScenario() { PartitionKeyRange partitionKey = new PartitionKeyRange("0", "00", "FF"); Map rangeQueryMap = new HashMap<>(); rangeQueryMap.put(partitionKey, querySpec); + + DocumentCollection documentCollection = new DocumentCollection(); + documentCollection.setResourceId("testCollectionRid"); + parallelDocumentQueryExecutionContextBase.initializeReadMany( rangeQueryMap, - new CosmosQueryRequestOptions(), "testCollectionRid"); + new CosmosQueryRequestOptions(), documentCollection); //Parent document producer created DocumentProducer documentProducer = parallelDocumentQueryExecutionContextBase.documentProducers.get(0); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java index aa55670d81a8..62ef75a312b2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java @@ -3,17 +3,16 @@ package com.azure.cosmos.implementation; +import java.util.Map; import java.util.Set; public class FeedOperationContext { - private final Set partitionKeyRangesWithSuccess; - + private final Map partitionKeyRangesWithSuccess; private final boolean isThresholdBasedAvailabilityStrategyEnabled; - private boolean isRequestHedged; - public FeedOperationContext(Set partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { + public FeedOperationContext(Map partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { this.partitionKeyRangesWithSuccess = partitionKeyRangesWithSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; } @@ -26,12 +25,16 @@ public boolean getIsRequestHedged() { return this.isRequestHedged; } - public void addPartitionKeyRangeWithSuccess(PartitionKeyRange partitionKeyRange) { - this.partitionKeyRangesWithSuccess.add(partitionKeyRange); + public void addPartitionKeyRangeWithSuccess(PartitionKeyRange partitionKeyRange, String resourceId) { + GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper partitionKeyRangeWrapper + = new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + this.partitionKeyRangesWithSuccess.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); } - public boolean hasPartitionKeyRangeSeenSuccess(PartitionKeyRange partitionKeyRange) { - return this.partitionKeyRangesWithSuccess.contains(partitionKeyRange); + public boolean hasPartitionKeyRangeSeenSuccess(PartitionKeyRange partitionKeyRange, String resourceId) { + GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper partitionKeyRangeWrapper + = new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + return this.partitionKeyRangesWithSuccess.containsKey(partitionKeyRangeWrapper); } public boolean isThresholdBasedAvailabilityStrategyEnabled() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 8b879cd4147a..29ffec4c0d98 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -47,12 +47,12 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest checkNotNull(request.requestContext, "requestContext cannot be null!"); PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; - String resourceId = request.getResourceId(); if (partitionKeyRange == null) { return; } + String resourceId = request.getResourceId(); checkNotNull(resourceId, "resourceId cannot be null!"); logger.info("Handling exception : {}", resourceId); @@ -507,11 +507,11 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat } } - private static class PartitionKeyRangeWrapper { + public static class PartitionKeyRangeWrapper { final PartitionKeyRange partitionKeyRange; final String resourceId; - private PartitionKeyRangeWrapper(PartitionKeyRange partitionKeyRange, String resourceId) { + public PartitionKeyRangeWrapper(PartitionKeyRange partitionKeyRange, String resourceId) { this.partitionKeyRange = partitionKeyRange; this.resourceId = resourceId; } @@ -531,15 +531,7 @@ public int hashCode() { } private enum LocationUnavailabilityStatus { - Available(100), - FreshUnavailable(200), - StaleUnavailable(300); - - private int priority; - - LocationUnavailabilityStatus(int priority) { - this.priority = priority; - } + Available, FreshUnavailable, StaleUnavailable; } private static double getAllowedFailureRatioByStatus(LocationUnavailabilityStatus status) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 70be375cd66b..68235e15e08d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1816,11 +1816,8 @@ private Mono getBatchDocumentRequest(DocumentClientRet request.requestContext.setExcludeRegions(options.getExcludeRegions()); } - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); - } - SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); + if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } @@ -1829,13 +1826,25 @@ private Mono getBatchDocumentRequest(DocumentClientRet request.requestContext.setExcludeRegions(options.getExcludeRegions()); } - Mono> collectionObs = - this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); + request.requestContext.setPointOperationContext(new PointOperationContext(new AtomicBoolean(false), false)); - return collectionObs.map((Utils.ValueHolder collectionValueHolder) -> { - addBatchHeaders(request, serverBatchRequest, collectionValueHolder.v); - return request; - }); + return this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request) + .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + + addBatchHeaders(request, serverBatchRequest, documentCollectionValueHolder.v); + + if (Configs.isPartitionLevelCircuitBreakerEnabled() && options != null) { + options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); + addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v); + } + + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } + + return Mono.just(request); + })); } private RxDocumentServiceRequest addBatchHeaders(RxDocumentServiceRequest request, @@ -3458,7 +3467,7 @@ private Flux> queryForReadMany( sqlQuery, rangeQueryMap, options, - collection.getResourceId(), + collection, parentResourceLink, activityId, klass, @@ -4126,7 +4135,10 @@ public Mono executeBatchRequest(String collectionLink, RequestOptions options, boolean disableAutomaticIdGeneration) { DocumentClientRetryPolicy documentClientRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(null); - return ObservableHelper.inlineIfPossibleAsObs(() -> executeBatchRequestInternal(collectionLink, serverBatchRequest, options, documentClientRetryPolicy, disableAutomaticIdGeneration), documentClientRetryPolicy); + AtomicReference requestReference = new AtomicReference<>(); + return handleRegionFeedbackForPointOperation(ObservableHelper + .inlineIfPossibleAsObs(() -> executeBatchRequestInternal( + collectionLink, serverBatchRequest, options, documentClientRetryPolicy, disableAutomaticIdGeneration, requestReference), documentClientRetryPolicy), requestReference); } private Mono executeStoredProcedureInternal(String storedProcedureLink, @@ -4170,14 +4182,19 @@ private Mono executeBatchRequestInternal(String collectionL ServerBatchRequest serverBatchRequest, RequestOptions options, DocumentClientRetryPolicy requestRetryPolicy, - boolean disableAutomaticIdGeneration) { + boolean disableAutomaticIdGeneration, + AtomicReference requestReference) { try { logger.debug("Executing a Batch request with number of operations {}", serverBatchRequest.getOperations().size()); Mono requestObs = getBatchDocumentRequest(requestRetryPolicy, collectionLink, serverBatchRequest, options, disableAutomaticIdGeneration); + Mono responseObservable = - requestObs.flatMap(request -> create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options))); + requestObs.flatMap(request -> { + requestReference.set(request); + return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); + }); return responseObservable .map(serviceResponse -> BatchResponseParser.fromDocumentServiceResponse(serviceResponse, serverBatchRequest, true)); @@ -6032,7 +6049,7 @@ private Mono executeFeedOperationWithAvailabilityStrategy( false, initialExcludedRegions); - Set partitionKeyRangesWithSuccess = ConcurrentHashMap.newKeySet(); + Map partitionKeyRangesWithSuccess = new ConcurrentHashMap<>(); if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java index dab4f2838194..9750771b6147 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/caches/RxPartitionKeyRangeCache.java @@ -249,6 +249,7 @@ private Mono> getPartitionKeyRange(MetadataDiagnosticsCo ); //this request doesn't actually go to server request.requestContext.resolvedCollectionRid = collectionRid; + request.setResourceId(collectionRid); Mono collectionObs = collectionCache.resolveCollectionAsync(metaDataDiagnosticsContext, request) .map(collectionValueHolder -> collectionValueHolder.v); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java index 86f3d328c1fe..88ae6aea5820 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java @@ -121,6 +121,7 @@ protected RxDocumentServiceRequest createDocumentServiceRequestWithFeedRange(Map ? this.createQueryDocumentServiceRequest(requestHeaders, querySpec) : this.createReadFeedDocumentServiceRequest(requestHeaders); request.requestContext.resolvedCollectionRid = collectionRid; + request.setResourceId(collectionRid); request.throughputControlGroupName = throughputControlGroupName; if (partitionKeyInternal != null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java index a13cfc65348d..e0aa3d61536f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java @@ -419,7 +419,7 @@ public static Flux> createSpecia public static Flux> createReadManyQueryAsync( DiagnosticsClientContext diagnosticsClientContext, IDocumentQueryClient queryClient, String collectionResourceId, SqlQuerySpec sqlQuery, Map rangeQueryMap, CosmosQueryRequestOptions cosmosQueryRequestOptions, - String resourceId, String collectionLink, UUID activityId, Class klass, + DocumentCollection collection, String collectionLink, UUID activityId, Class klass, ResourceType resourceTypeEnum, final AtomicBoolean isQueryCancelledOnTimeout) { @@ -429,7 +429,7 @@ public static Flux> createReadMa sqlQuery, rangeQueryMap, cosmosQueryRequestOptions, - resourceId, + collection, collectionLink, activityId, klass, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 4a2ed7c27aaa..c8452ffeb450 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -176,7 +176,7 @@ private Mono> nextPage(RxDocumentServiceRequest request) { if (request.getResourceType() == ResourceType.Document) { FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); - feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange); + feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); } }) .doOnError(throwable -> completed.set(true)) @@ -198,7 +198,7 @@ private Mono> nextPage(RxDocumentServiceRequest request) { FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); if (feedOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange)) { + if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { this.tryMarkPartitionKeyRangeAsUnavailable(request); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java index 020b8633d2be..f6c2a80dc80d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java @@ -127,7 +127,7 @@ public static Flux> createReadManyQueryA IDocumentQueryClient queryClient, SqlQuerySpec sqlQuery, Map rangeQueryMap, - CosmosQueryRequestOptions cosmosQueryRequestOptions, String collectionRid, String collectionLink, UUID activityId, Class klass, + CosmosQueryRequestOptions cosmosQueryRequestOptions, DocumentCollection collection, String collectionLink, UUID activityId, Class klass, ResourceType resourceTypeEnum, final AtomicBoolean isQueryCancelledOnTimeout) { @@ -144,7 +144,7 @@ public static Flux> createReadManyQueryA isQueryCancelledOnTimeout); context - .initializeReadMany(rangeQueryMap, cosmosQueryRequestOptions, collectionRid); + .initializeReadMany(rangeQueryMap, cosmosQueryRequestOptions, collection); return Flux.just(context); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java index 1fd3f1e62e79..ffee7efd24a0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java @@ -139,7 +139,7 @@ public void setTop(int newTop) { protected void initializeReadMany( Map rangeQueryMap, CosmosQueryRequestOptions cosmosQueryRequestOptions, - String collectionRid) { + DocumentCollection collection) { Map commonRequestHeaders = createCommonHeadersAsync(this.getFeedOptions(null, null)); for (Map.Entry entry : rangeQueryMap.entrySet()) { @@ -153,11 +153,13 @@ protected void initializeReadMany( headers.put(HttpConstants.HttpHeaders.CONTINUATION, continuationToken); headers.put(HttpConstants.HttpHeaders.PAGE_SIZE, Strings.toString(pageSize)); + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setPartitionKeyDefinition(cosmosQueryRequestOptions, collection.getPartitionKey()); + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().setCollectionRid(cosmosQueryRequestOptions, collection.getResourceId()); return this.createDocumentServiceRequestWithFeedRange(headers, querySpec, null, partitionKeyRange, - collectionRid, + collection.getResourceId(), cosmosQueryRequestOptions.getThroughputControlGroupName()); }; @@ -167,7 +169,7 @@ protected void initializeReadMany( DocumentProducer dp = createDocumentProducer( - collectionRid, + collection.getResourceId(), null, -1, cosmosQueryRequestOptions, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java index 651b0e6cda1f..81f273fc9891 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/PipelinedQueryExecutionContext.java @@ -100,7 +100,7 @@ static Flux> createAsyncCore( public static Flux> createReadManyAsync( DiagnosticsClientContext diagnosticsClientContext, IDocumentQueryClient queryClient, SqlQuerySpec sqlQuery, Map rangeQueryMap, CosmosQueryRequestOptions cosmosQueryRequestOptions, - String resourceId, String collectionLink, UUID activityId, Class klass, + DocumentCollection collection, String collectionLink, UUID activityId, Class klass, ResourceType resourceTypeEnum, final AtomicBoolean isQueryCancelledOnTimeout) { @@ -108,7 +108,7 @@ public static Flux> createReadManyAsyn ParallelDocumentQueryExecutionContext.createReadManyQueryAsync(diagnosticsClientContext, queryClient, sqlQuery, rangeQueryMap, - cosmosQueryRequestOptions, resourceId, + cosmosQueryRequestOptions, collection, collectionLink, activityId, klass, resourceTypeEnum, isQueryCancelledOnTimeout); From 169c1ba1e061c5caf14324fa4236cc98c2467570 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 20 May 2024 18:37:46 -0400 Subject: [PATCH 032/140] Fixing merge. --- .../implementation/ClientRetryPolicyTest.java | 1 + .../implementation/ConsistencyTestsBase.java | 17 ++- .../PartitionLevelCircuitBreakerTests.java | 4 +- .../CosmosQueryRequestOptionsImpl.java | 9 ++ .../ImplementationBridgeHelpers.java | 135 +++++++++--------- .../implementation/RxDocumentClientImpl.java | 132 +++++++++++------ .../directconnectivity/StoreClient.java | 9 +- .../DefaultDocumentQueryExecutionContext.java | 74 +++++----- .../query/DocumentProducer.java | 17 +-- .../implementation/query/Paginator.java | 4 +- .../models/CosmosQueryRequestOptions.java | 29 +--- .../models/CosmosReadManyRequestOptions.java | 2 + 12 files changed, 235 insertions(+), 198 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java index 64ec50222cae..544b2c990ec1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java @@ -6,6 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosException; import com.azure.cosmos.ThrottlingRetryOptions; +import com.azure.cosmos.implementation.directconnectivity.ChannelAcquisitionException; import io.netty.handler.timeout.ReadTimeoutException; import io.reactivex.subscribers.TestSubscriber; import org.mockito.Mockito; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java index c19993c74efa..034b0a9fa6b0 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java @@ -4,21 +4,20 @@ package com.azure.cosmos.implementation; +import com.azure.cosmos.BridgeInternal; +import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.DirectConnectionConfig; import com.azure.cosmos.GatewayConnectionConfig; import com.azure.cosmos.implementation.apachecommons.collections.map.UnmodifiableMap; -import com.azure.cosmos.BridgeInternal; -import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; +import com.azure.cosmos.implementation.directconnectivity.WFConstants; +import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; +import com.azure.cosmos.implementation.routing.Range; import com.azure.cosmos.models.CosmosClientTelemetryConfig; -import com.azure.cosmos.models.ModelBridgeInternal; import com.azure.cosmos.models.PartitionKey; import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.PartitionKind; -import com.azure.cosmos.implementation.directconnectivity.WFConstants; -import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; -import com.azure.cosmos.implementation.routing.Range; import org.apache.commons.lang3.StringUtils; import org.assertj.core.api.Assertions; import org.testng.SkipException; @@ -775,17 +774,17 @@ void validateSessionTokenMultiPartitionCollectionBase(boolean useGateway, boolea RequestOptions option = new RequestOptions(); option.setSessionToken(sessionToken); option.setPartitionKey(new PartitionKey(2)); - writeClient.readDocument(childResource2.getResource().getSelfLink(), option).block(); + writeClient.readDocument(childResource2.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); option = new RequestOptions(); option.setSessionToken(StringUtils.EMPTY); option.setPartitionKey(new PartitionKey(1)); - writeClient.readDocument(childResource1.getResource().getSelfLink(), option).block(); + writeClient.readDocument(childResource1.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); option = new RequestOptions(); option.setSessionToken(sessionToken); option.setPartitionKey(new PartitionKey(1)); - Mono> readObservable = writeClient.readDocument(childResource1.getResource().getSelfLink(), option); + Mono> readObservable = writeClient.readDocument(childResource1.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); FailureValidator failureValidator = new FailureValidator.Builder().statusCode(HttpConstants.StatusCodes.NOTFOUND).subStatusCode(HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE).build(); validateFailure(readObservable, failureValidator); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 396b0befb8cb..f4614b795433 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -123,8 +123,8 @@ public Object[][] readManyTestConfigs() { return new Object[][] { {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofMinutes(6), false, false}, {FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, true}, +// {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, +// {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, true}, }; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java index 92091144a4d1..f1d9b041efca 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java @@ -29,6 +29,7 @@ public final class CosmosQueryRequestOptionsImpl extends CosmosQueryRequestOptio private String queryName; private Integer maxItemCountForVectorSearch; private List cancelledRequestDiagnosticsTracker = new ArrayList<>(); + private String collectionRid; /** * Instantiates a new query request options. @@ -356,4 +357,12 @@ public void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinit public PartitionKeyDefinition getPartitionKeyDefinition() { return this.partitionKeyDefinition; } + + public String getCollectionRid() { + return collectionRid; + } + + public void setCollectionRid(String collectionRid) { + this.collectionRid = collectionRid; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 09536eaa38ac..b7098089b471 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -126,7 +126,6 @@ public static CosmosClientBuilderAccessor getCosmosClientBuilderAccessor() { CosmosClientBuilderAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosClientBuilderAccessor is not initialized yet!"); - System.exit(9700); // Using a unique status code here to help debug the issue. } return snapshot; @@ -151,6 +150,12 @@ void setCosmosClientMetadataCachesSnapshot(CosmosClientBuilder builder, ConsistencyLevel getConsistencyLevel(CosmosClientBuilder builder); String getEndpoint(CosmosClientBuilder builder); + + CosmosItemSerializer getDefaultCustomSerializer(CosmosClientBuilder builder); + + void setRegionScopedSessionCapturingEnabled(CosmosClientBuilder builder, boolean isRegionScopedSessionCapturingEnabled); + + boolean getRegionScopedSessionCapturingEnabled(CosmosClientBuilder builder); } } @@ -178,7 +183,6 @@ public static PartitionKeyAccessor getPartitionKeyAccessor() { PartitionKeyAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("PartitionKeyAccessor is not initialized yet!"); - System.exit(9701); // Using a unique status code here to help debug the issue. } return snapshot; @@ -186,6 +190,8 @@ public static PartitionKeyAccessor getPartitionKeyAccessor() { public interface PartitionKeyAccessor { PartitionKey toPartitionKey(PartitionKeyInternal partitionKeyInternal); + PartitionKey toPartitionKey(List values, boolean strict); + PartitionKeyInternal getPartitionKeyInternal(PartitionKey partitionKey); } } @@ -213,7 +219,6 @@ public static DirectConnectionConfigAccessor getDirectConnectionConfigAccessor() DirectConnectionConfigAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("DirectConnectionConfigAccessor is not initialized yet!"); - System.exit(9702); // Using a unique status code here to help debug the issue. } return snapshot; @@ -259,31 +264,20 @@ public static CosmosQueryRequestOptionsAccessor getCosmosQueryRequestOptionsAcce CosmosQueryRequestOptionsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosQueryRequestOptionsAccessor is not initialized yet!"); - System.exit(9703); // Using a unique status code here to help debug the issue. } return snapshot; } public interface CosmosQueryRequestOptionsAccessor { - CosmosQueryRequestOptions clone( - CosmosQueryRequestOptions toBeCloned); - void setOperationContext(CosmosQueryRequestOptions queryRequestOptions, OperationContextAndListenerTuple operationContext); - OperationContextAndListenerTuple getOperationContext(CosmosQueryRequestOptions queryRequestOptions); - CosmosQueryRequestOptions setHeader(CosmosQueryRequestOptions queryRequestOptions, String name, String value); - Map getHeader(CosmosQueryRequestOptions queryRequestOptions); + CosmosQueryRequestOptionsBase getImpl(CosmosQueryRequestOptions options); + CosmosQueryRequestOptions clone(CosmosQueryRequestOptions toBeCloned); + CosmosQueryRequestOptions clone(CosmosQueryRequestOptionsBase toBeCloned); boolean isQueryPlanRetrievalDisallowed(CosmosQueryRequestOptions queryRequestOptions); CosmosQueryRequestOptions disallowQueryPlanRetrieval(CosmosQueryRequestOptions queryRequestOptions); - UUID getCorrelationActivityId(CosmosQueryRequestOptions queryRequestOptions); - CosmosQueryRequestOptions setCorrelationActivityId(CosmosQueryRequestOptions queryRequestOptions, UUID correlationActivityId); boolean isEmptyPageDiagnosticsEnabled(CosmosQueryRequestOptions queryRequestOptions); - Function getItemFactoryMethod(CosmosQueryRequestOptions queryRequestOptions, Class classOfT); - CosmosQueryRequestOptions setItemFactoryMethod(CosmosQueryRequestOptions queryRequestOptions, Function factoryMethod); String getQueryNameOrDefault(CosmosQueryRequestOptions queryRequestOptions, String defaultQueryName); RequestOptions toRequestOptions(CosmosQueryRequestOptions queryRequestOptions); - CosmosDiagnosticsThresholds getDiagnosticsThresholds(CosmosQueryRequestOptions options); - CosmosEndToEndOperationLatencyPolicyConfig getEndToEndOperationLatencyPolicyConfig(CosmosQueryRequestOptions options); - List getExcludeRegions(CosmosQueryRequestOptions options); List getCancelledRequestDiagnosticsTracker(CosmosQueryRequestOptions options); void setCancelledRequestDiagnosticsTracker( CosmosQueryRequestOptions options, @@ -301,6 +295,10 @@ void setCancelledRequestDiagnosticsTracker( void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition); PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options); + + void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid); + + String getCollectionRid(CosmosQueryRequestOptions options); } } @@ -334,19 +332,7 @@ public static CosmosReadManyRequestOptionsAccessor getCosmosReadManyRequestOptio } public interface CosmosReadManyRequestOptionsAccessor { - public CosmosQueryRequestOptionsBase getImpl(CosmosReadManyRequestOptions options); - - void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition); - - PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options); - - void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid); - - String getCollectionRid(CosmosQueryRequestOptions options); - - void setPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options, Set pkRangesWithSuccessfulRequests); - - Set getPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options); + CosmosQueryRequestOptionsBase getImpl(CosmosReadManyRequestOptions options); } } @@ -374,7 +360,6 @@ public static CosmosChangeFeedRequestOptionsAccessor getCosmosChangeFeedRequestO CosmosChangeFeedRequestOptionsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosChangeFeedRequestOptionsAccessor is not initialized yet!"); - System.exit(9704); // Using a unique status code here to help debug the issue. } return snapshot; @@ -385,10 +370,11 @@ public interface CosmosChangeFeedRequestOptionsAccessor { Map getHeader(CosmosChangeFeedRequestOptions changeFeedRequestOptions); void setOperationContext(CosmosChangeFeedRequestOptions changeFeedRequestOptions, OperationContextAndListenerTuple operationContext); OperationContextAndListenerTuple getOperationContext(CosmosChangeFeedRequestOptions changeFeedRequestOptions); - Function getItemFactoryMethod(CosmosChangeFeedRequestOptions queryRequestOptions, Class classOfT); - CosmosChangeFeedRequestOptions setItemFactoryMethod(CosmosChangeFeedRequestOptions queryRequestOptions, Function factoryMethod); CosmosDiagnosticsThresholds getDiagnosticsThresholds(CosmosChangeFeedRequestOptions options); List getExcludeRegions(CosmosChangeFeedRequestOptions cosmosChangeFeedRequestOptions); + CosmosChangeFeedRequestOptions createForProcessingFromContinuation(String continuation, FeedRange targetRange, String continuationLsn); + + CosmosChangeFeedRequestOptions clone(CosmosChangeFeedRequestOptions toBeCloned); } } @@ -416,13 +402,13 @@ public static CosmosItemRequestOptionsAccessor getCosmosItemRequestOptionsAccess CosmosItemRequestOptionsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosItemRequestOptionsAccessor is not initialized yet!"); - System.exit(9705); // Using a unique status code here to help debug the issue. } return snapshot; } public interface CosmosItemRequestOptionsAccessor { + RequestOptions toRequestOptions(CosmosItemRequestOptions itemRequestOptions, CosmosItemSerializer effectiveItemSerializer); void setOperationContext(CosmosItemRequestOptions queryRequestOptions, OperationContextAndListenerTuple operationContext); OperationContextAndListenerTuple getOperationContext(CosmosItemRequestOptions queryRequestOptions); CosmosItemRequestOptions clone(CosmosItemRequestOptions options); @@ -440,6 +426,8 @@ WriteRetryPolicy calculateAndGetEffectiveNonIdempotentRetriesEnabled( CosmosEndToEndOperationLatencyPolicyConfig getEndToEndOperationLatencyPolicyConfig( CosmosItemRequestOptions options); + + CosmosPatchItemRequestOptions clonePatchItemRequestOptions(CosmosPatchItemRequestOptions options); } } @@ -467,7 +455,6 @@ public static CosmosBulkExecutionOptionsAccessor getCosmosBulkExecutionOptionsAc CosmosBulkExecutionOptionsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosBulkExecutionOptionsAccessor is not initialized yet!"); - System.exit(9706); // Using a unique status code here to help debug the issue. } return snapshot; @@ -512,11 +499,10 @@ CosmosBulkExecutionOptions setHeader(CosmosBulkExecutionOptions cosmosBulkExecut Map getCustomOptions(CosmosBulkExecutionOptions cosmosBulkExecutionOptions); List getExcludeRegions(CosmosBulkExecutionOptions cosmosBulkExecutionOptions); int getMaxMicroBatchSize(CosmosBulkExecutionOptions cosmosBulkExecutionOptions); - - void setMaxMicroBatchSize(CosmosBulkExecutionOptions cosmosBulkExecutionOptions, int maxMicroBatchSize); - void setDiagnosticsTracker(CosmosBulkExecutionOptions cosmosBulkExecutionOptions, BulkExecutorDiagnosticsTracker tracker); BulkExecutorDiagnosticsTracker getDiagnosticsTracker(CosmosBulkExecutionOptions cosmosBulkExecutionOptions); + + CosmosBulkExecutionOptions clone(CosmosBulkExecutionOptions toBeCloned); } } @@ -546,7 +532,6 @@ public static CosmosItemResponseBuilderAccessor getCosmosItemResponseBuilderAcce CosmosItemResponseBuilderAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosItemResponseBuilderAccessor is not initialized yet!"); - System.exit(9707); // Using a unique status code here to help debug the issue. } return snapshot; @@ -555,7 +540,11 @@ public static CosmosItemResponseBuilderAccessor getCosmosItemResponseBuilderAcce public interface CosmosItemResponseBuilderAccessor { CosmosItemResponse createCosmosItemResponse(CosmosItemResponse response, Class classType, - ItemDeserializer itemDeserializer); + CosmosItemSerializer serializer); + + CosmosItemResponse createCosmosItemResponse(ResourceResponse response, + Class classType, + CosmosItemSerializer serializer); CosmosItemResponse withRemappedStatusCode( @@ -599,7 +588,6 @@ public static CosmosClientAccessor getCosmosClientAccessor() { CosmosClientAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosClientAccessor is not initialized yet!"); - System.exit(9708); // Using a unique status code here to help debug the issue. } return snapshot; @@ -635,13 +623,13 @@ public static CosmosContainerPropertiesAccessor getCosmosContainerPropertiesAcce CosmosContainerPropertiesAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosContainerPropertiesAccessor is not initialized yet!"); - System.exit(9709); // Using a unique status code here to help debug the issue. } return snapshot; } public interface CosmosContainerPropertiesAccessor { + CosmosContainerProperties create(DocumentCollection documentCollection); String getSelfLink(CosmosContainerProperties cosmosContainerProperties); void setSelfLink(CosmosContainerProperties cosmosContainerProperties, String selfLink); } @@ -672,7 +660,6 @@ public static CosmosPageFluxAccessor getCosmosPageFluxAccessor() { CosmosPageFluxAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosPageFluxAccessor is not initialized yet!"); - System.exit(9710); // Using a unique status code here to help debug the issue. } return snapshot; @@ -708,7 +695,6 @@ public static CosmosAsyncDatabaseAccessor getCosmosAsyncDatabaseAccessor() { CosmosAsyncDatabaseAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosAsyncDatabaseAccessor is not initialized yet!"); - System.exit(9711); // Using a unique status code here to help debug the issue. } return snapshot; @@ -745,7 +731,6 @@ public static CosmosBulkExecutionThresholdsStateAccessor getBulkExecutionThresho CosmosBulkExecutionThresholdsStateAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosBulkExecutionThresholdsStateAccessor is not initialized yet!"); - System.exit(9712); // Using a unique status code here to help debug the issue. } return snapshot; @@ -784,7 +769,6 @@ public static CosmosDiagnosticsAccessor getCosmosDiagnosticsAccessor() { CosmosDiagnosticsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosDiagnosticsAccessor is not initialized yet!"); - System.exit(9713); // Using a unique status code here to help debug the issue. } return snapshot; @@ -847,7 +831,6 @@ public static CosmosDiagnosticsContextAccessor getCosmosDiagnosticsContextAccess CosmosDiagnosticsContextAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosDiagnosticsAccessor is not initialized yet!"); - System.exit(9713); // Using a unique status code here to help debug the issue. } return snapshot; @@ -917,7 +900,7 @@ boolean endOperation( String getSpanName(CosmosDiagnosticsContext ctx); - void setSamplingRateSnapshot(CosmosDiagnosticsContext ctx, double samplingRate); + void setSamplingRateSnapshot(CosmosDiagnosticsContext ctx, double samplingRate, boolean isSampledOut); Integer getSequenceNumber(CosmosDiagnosticsContext ctx); @@ -950,7 +933,6 @@ public static CosmosAsyncContainerAccessor getCosmosAsyncContainerAccessor() { CosmosAsyncContainerAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosAsyncContainerAccessor is not initialized yet!"); - System.exit(9714); // Using a unique status code here to help debug the issue. } return snapshot; @@ -980,7 +962,7 @@ IFaultInjectorProvider getOrConfigureFaultInjectorProvider( Mono> readMany( CosmosAsyncContainer cosmosAsyncContainer, List itemIdentityList, - CosmosQueryRequestOptions requestOptions, + CosmosReadManyRequestOptions requestOptions, Class classType); Function>> queryItemsInternalFunc( @@ -996,6 +978,16 @@ Function>> queryItemsInternalFu Class classType); Mono> getFeedRanges(CosmosAsyncContainer cosmosAsyncContainer, boolean forceRefresh); + + Mono> trySplitFeedRange( + CosmosAsyncContainer cosmosAsyncContainer, + FeedRange feedRange, + int targetedCountAfterSplit); + + String getLinkWithoutTrailingSlash(CosmosAsyncContainer cosmosAsyncContainer); + Mono checkFeedRangeOverlapping(CosmosAsyncContainer container, FeedRange feedRange1, FeedRange feedRange2); + Mono> getOverlappingFeedRanges(CosmosAsyncContainer container, FeedRange feedRange, boolean forceRefresh); + Mono getPartitionKeyDefinition(CosmosAsyncContainer container); } } @@ -1024,13 +1016,19 @@ public static FeedResponseAccessor getFeedResponseAccessor() { FeedResponseAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("FeedResponseAccessor is not initialized yet!"); - System.exit(9715); // Using a unique status code here to help debug the issue. } return snapshot; } public interface FeedResponseAccessor { + FeedResponse createFeedResponse(RxDocumentServiceResponse response, + CosmosItemSerializer itemSerializer, + Class cls); + + FeedResponse createChangeFeedResponse(RxDocumentServiceResponse response, + CosmosItemSerializer itemSerializer, + Class cls); boolean getNoChanges(FeedResponse feedResponse); FeedResponse convertGenericType(FeedResponse feedResponse, Function conversion); FeedResponse createFeedResponse( @@ -1054,7 +1052,6 @@ public static CosmosBatchRequestOptionsAccessor getCosmosBatchRequestOptionsAcce CosmosBatchRequestOptionsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosBatchRequestOptionsAccessor is not initialized yet!"); - System.exit(9716); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1076,6 +1073,8 @@ CosmosBatchRequestOptions setConsistencyLevel(CosmosBatchRequestOptions cosmosBa CosmosBatchRequestOptions setHeader(CosmosBatchRequestOptions cosmosItemRequestOptions, String name, String value); Map getHeader(CosmosBatchRequestOptions cosmosItemRequestOptions); List getExcludeRegions(CosmosBatchRequestOptions cosmosBatchRequestOptions); + + CosmosBatchRequestOptions clone(CosmosBatchRequestOptions toBeCloned); } } @@ -1095,7 +1094,6 @@ public static CosmosBatchOperationResultAccessor getCosmosBatchOperationResultAc CosmosBatchOperationResultAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosBatchOperationResultAccessor is not initialized yet!"); - System.exit(9717); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1113,6 +1111,8 @@ public static void setCosmosBatchOperationResultAccessor(final CosmosBatchOperat public interface CosmosBatchOperationResultAccessor { ObjectNode getResourceObject(CosmosBatchOperationResult cosmosBatchOperationResult); void setResourceObject(CosmosBatchOperationResult cosmosBatchOperationResult, ObjectNode objectNode); + void setEffectiveItemSerializer(CosmosBatchOperationResult cosmosBatchOperationResult, + CosmosItemSerializer effectiveItemSerializer); } } @@ -1132,7 +1132,6 @@ public static CosmosPatchOperationsAccessor getCosmosPatchOperationsAccessor() { CosmosPatchOperationsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosPatchOperationsAccessor is not initialized yet!"); - System.exit(9718); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1168,7 +1167,6 @@ public static CosmosBatchAccessor getCosmosBatchAccessor() { CosmosBatchAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosBatchAccessor is not initialized yet!"); - System.exit(9719); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1204,7 +1202,6 @@ public static CosmosBulkItemResponseAccessor getCosmosBulkItemResponseAccessor() CosmosBulkItemResponseAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosBulkItemResponseAccessor is not initialized yet!"); - System.exit(9720); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1224,6 +1221,9 @@ public interface CosmosBulkItemResponseAccessor { void setResourceObject(CosmosBulkItemResponse cosmosBulkItemResponse, ObjectNode objectNode); + + void setEffectiveItemSerializer(CosmosBulkItemResponse cosmosBulkItemResponse, + CosmosItemSerializer effectiveItemSerializer); } } @@ -1243,7 +1243,6 @@ public static CosmosBatchResponseAccessor getCosmosBatchResponseAccessor() { CosmosBatchResponseAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosBatchResponseAccessor is not initialized yet!"); - System.exit(9721); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1279,7 +1278,6 @@ public static CosmosAsyncClientEncryptionKeyAccessor getCosmosAsyncClientEncrypt CosmosAsyncClientEncryptionKeyAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosAsyncClientEncryptionKeyAccessor is not initialized yet!"); - System.exit(9722); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1324,7 +1322,6 @@ public static CosmosAsyncClientAccessor getCosmosAsyncClientAccessor() { CosmosAsyncClientAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosAsyncClientAccessor is not initialized yet!"); - System.exit(9723); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1356,6 +1353,10 @@ CosmosDiagnosticsThresholds getEffectiveDiagnosticsThresholds( CosmosDiagnosticsThresholds operationLevelThresholds); DiagnosticsProvider getDiagnosticsProvider(CosmosAsyncClient client); + + CosmosItemSerializer getEffectiveItemSerializer( + CosmosAsyncClient client, + CosmosItemSerializer requestOptionsItemSerializer); } } @@ -1383,7 +1384,6 @@ public static CosmosDiagnosticsThresholdsAccessor getCosmosAsyncClientAccessor() CosmosDiagnosticsThresholdsAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosDiagnosticsThresholdsAccessor is not initialized yet!"); - System.exit(9727); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1414,7 +1414,6 @@ public static CosmosExceptionAccessor getCosmosExceptionAccessor() { CosmosExceptionAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosExceptionAccessor is not initialized yet!"); - System.exit(9800); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1461,7 +1460,6 @@ public static CosmosClientTelemetryConfigAccessor getCosmosClientTelemetryConfig CosmosClientTelemetryConfigAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("CosmosClientTelemetryConfigAccessor is not initialized yet!"); - System.exit(9724); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1512,6 +1510,9 @@ CosmosClientTelemetryConfig createSnapshot( void setUseLegacyTracing(CosmosClientTelemetryConfig config, boolean useLegacyTracing); void setTracer(CosmosClientTelemetryConfig config, Tracer tracer); double getSamplingRate(CosmosClientTelemetryConfig config); + double[] getDefaultPercentiles(CosmosClientTelemetryConfig config); + boolean shouldPublishHistograms(CosmosClientTelemetryConfig config); + boolean shouldApplyDiagnosticThresholdsForTransportLevelMeters(CosmosClientTelemetryConfig config); } } @@ -1531,7 +1532,6 @@ public static PriorityLevelAccessor getPriorityLevelAccessor() { PriorityLevelAccessor snapshot = accessor.get(); if (snapshot == null) { logger.error("PriorityLevelAccessor is not initialized yet!"); - System.exit(9728); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1556,7 +1556,7 @@ public interface PriorityLevelAccessor { public static final class CosmosContainerIdentityHelper { - private static final AtomicReference cosmosContainerIdentityClassLoaded = new AtomicReference<>(false); + private static final AtomicBoolean cosmosContainerIdentityClassLoaded = new AtomicBoolean(false); private static final AtomicReference accessor = new AtomicReference<>(); private CosmosContainerIdentityHelper() {} @@ -1572,7 +1572,6 @@ public static CosmosContainerIdentityAccessor getCosmosContainerIdentityAccessor if (snapshot == null) { logger.error("CosmosContainerIdentityAccessor is not initialized yet!"); - System.exit(9725); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1599,7 +1598,7 @@ public interface CosmosContainerIdentityAccessor { public static final class CosmosContainerProactiveInitConfigHelper { - private static final AtomicReference cosmosContainerProactiveInitConfigClassLoaded = new AtomicReference<>(false); + private static final AtomicBoolean cosmosContainerProactiveInitConfigClassLoaded = new AtomicBoolean(false); private static final AtomicReference accessor = new AtomicReference<>(); private CosmosContainerProactiveInitConfigHelper() {} @@ -1615,7 +1614,6 @@ public static CosmosContainerProactiveInitConfigAccessor getCosmosContainerProac if (snapshot == null) { logger.error("CosmosContainerProactiveInitConfigAccessor is not initialized yet!"); - System.exit(9726); // Using a unique status code here to help debug the issue. } return snapshot; @@ -1639,7 +1637,7 @@ public interface CosmosContainerProactiveInitConfigAccessor { } public static final class CosmosSessionRetryOptionsHelper { - private static final AtomicReference cosmosSessionRetryOptionsClassLoaded = new AtomicReference<>(false); + private static final AtomicBoolean cosmosSessionRetryOptionsClassLoaded = new AtomicBoolean(false); private static final AtomicReference accessor = new AtomicReference<>(); private CosmosSessionRetryOptionsHelper() {} @@ -1655,7 +1653,6 @@ public static CosmosSessionRetryOptionsAccessor getCosmosSessionRetryOptionsAcce if (snapshot == null) { logger.error("cosmosSessionRetryOptionsAccessor is not initialized yet!"); - System.exit(9727); // Using a unique status code here to help debug the issue. } return snapshot; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index c4868eb459a8..6bc3a4eff8ef 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -109,7 +109,6 @@ import java.util.Locale; import java.util.Map; import java.util.NoSuchElementException; -import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; @@ -540,7 +539,6 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.isRegionScopedSessionCapturingEnabledOnClientOrSystemConfig = isRegionScopedSessionCapturingEnabled; this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); - this.retryPolicy = new RetryPolicy(this, this.globalEndpointManager, this.connectionPolicy); this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManagerForCircuitBreaker).init(); @@ -576,7 +574,7 @@ public CosmosDiagnostics createDiagnostics() { return diagnostics; } - private void initializeGatewayConfigurationReader() { + private DatabaseAccount initializeGatewayConfigurationReader() { this.gatewayConfigurationReader = new GatewayServiceConfigurationReader(this.globalEndpointManager); DatabaseAccount databaseAccount = this.globalEndpointManager.getLatestDatabaseAccount(); //Database account should not be null here, @@ -603,16 +601,52 @@ private void initializeGatewayConfigurationReader() { } this.useMultipleWriteLocations = this.connectionPolicy.isMultipleWriteRegionsEnabled() && BridgeInternal.isEnableMultipleWriteLocations(databaseAccount); - + return databaseAccount; // TODO: add support for openAsync // https://msdata.visualstudio.com/CosmosDB/_workitems/edit/332589 } + private void resetSessionContainerIfNeeded(DatabaseAccount databaseAccount) { + boolean isRegionScopingOfSessionTokensPossible = this.isRegionScopingOfSessionTokensPossible(databaseAccount, this.useMultipleWriteLocations, this.isRegionScopedSessionCapturingEnabledOnClientOrSystemConfig); + + if (isRegionScopingOfSessionTokensPossible) { + this.sessionContainer = new RegionScopedSessionContainer(this.serviceEndpoint.getHost(), this.sessionCapturingDisabled, this.globalEndpointManager); + this.diagnosticsClientConfig.withRegionScopedSessionContainerOptions((RegionScopedSessionContainer) this.sessionContainer); + } + } + + private boolean isRegionScopingOfSessionTokensPossible(DatabaseAccount databaseAccount, boolean useMultipleWriteLocations, boolean isRegionScopedSessionCapturingEnabled) { + + if (!isRegionScopedSessionCapturingEnabled) { + return false; + } + + if (!useMultipleWriteLocations) { + return false; + } + + Iterable readableLocationsIterable = databaseAccount.getReadableLocations(); + Iterator readableLocationsIterator = readableLocationsIterable.iterator(); + + while (readableLocationsIterator.hasNext()) { + DatabaseAccountLocation readableLocation = readableLocationsIterator.next(); + + String normalizedReadableRegion = readableLocation.getName().toLowerCase(Locale.ROOT).trim().replace(" ", ""); + + if (RegionNameToRegionIdMap.getRegionId(normalizedReadableRegion) == -1) { + return false; + } + } + + return true; + } + private void updateGatewayProxy() { (this.gatewayProxy).setGatewayServiceConfigurationReader(this.gatewayConfigurationReader); (this.gatewayProxy).setCollectionCache(this.collectionCache); (this.gatewayProxy).setPartitionKeyRangeCache(this.partitionKeyRangeCache); (this.gatewayProxy).setUseMultipleWriteLocations(this.useMultipleWriteLocations); + (this.gatewayProxy).setSessionContainer(this.sessionContainer); } public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Function httpClientInterceptor) { @@ -631,11 +665,11 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.userAgentContainer, this.globalEndpointManager, this.reactorHttpClient, - this.apiType); - this.apiType, this.globalPartitionEndpointManagerForCircuitBreaker); + this.globalEndpointManager.init(); + DatabaseAccount databaseAccountSnapshot = this.initializeGatewayConfigurationReader(); this.resetSessionContainerIfNeeded(databaseAccountSnapshot); @@ -675,7 +709,7 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this, this.connectionPolicy.getPreferredRegions()); clientTelemetry.init().thenEmpty((publisher) -> { - logger.info( + logger.warn( "Initialized DocumentClient [{}] with machineId[{}]" + " serviceEndpoint [{}], connectionPolicy [{}], consistencyLevel [{}]", clientId, @@ -877,7 +911,7 @@ private Mono> createDatabaseInternal(Database databas Map requestHeaders = this.getRequestHeaders(options, ResourceType.Database, OperationType.Create); Instant serializationStartTimeUTC = Instant.now(); - ByteBuffer byteBuffer = ModelBridgeInternal.serializeJsonToByteBuffer(database); + ByteBuffer byteBuffer = database.serializeJsonToByteBuffer(CosmosItemSerializer.DEFAULT_SERIALIZER, null); Instant serializationEndTimeUTC = Instant.now(); SerializationDiagnosticsContext.SerializationDiagnostics serializationDiagnostics = new SerializationDiagnosticsContext.SerializationDiagnostics( serializationStartTimeUTC, @@ -1013,7 +1047,7 @@ private OperationContextAndListenerTuple getOperationContextAndListenerTuple(Cos if (options == null) { return null; } - return ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getOperationContext(options); + return qryOptAccessor.getImpl(options).getOperationContextAndListenerTuple(); } private OperationContextAndListenerTuple getOperationContextAndListenerTuple(RequestOptions options) { @@ -1046,7 +1080,8 @@ private Flux> createQuery( CosmosQueryRequestOptions nonNullQueryOptions = state.getQueryOptions(); UUID correlationActivityIdOfRequestOptions = qryOptAccessor - .getCorrelationActivityId(nonNullQueryOptions); + .getImpl(nonNullQueryOptions) + .getCorrelationActivityId(); UUID correlationActivityId = correlationActivityIdOfRequestOptions != null ? correlationActivityIdOfRequestOptions : randomUuid(); @@ -1283,7 +1318,7 @@ private Mono> createCollectionInternal(Stri Map requestHeaders = this.getRequestHeaders(options, ResourceType.DocumentCollection, OperationType.Create); Instant serializationStartTimeUTC = Instant.now(); - ByteBuffer byteBuffer = ModelBridgeInternal.serializeJsonToByteBuffer(collection); + ByteBuffer byteBuffer = collection.serializeJsonToByteBuffer(CosmosItemSerializer.DEFAULT_SERIALIZER, null); Instant serializationEndTimeUTC = Instant.now(); SerializationDiagnosticsContext.SerializationDiagnostics serializationDiagnostics = new SerializationDiagnosticsContext.SerializationDiagnostics( serializationStartTimeUTC, @@ -1304,7 +1339,9 @@ private Mono> createCollectionInternal(Stri return this.create(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)).map(response -> toResourceResponse(response, DocumentCollection.class)) .doOnNext(resourceResponse -> { // set the session token - this.sessionContainer.setSessionToken(resourceResponse.getResource().getResourceId(), + this.sessionContainer.setSessionToken( + request, + resourceResponse.getResource().getResourceId(), getAltLink(resourceResponse.getResource()), resourceResponse.getResponseHeaders()); }); @@ -1334,7 +1371,7 @@ private Mono> replaceCollectionInternal(Doc String path = Utils.joinPath(collection.getSelfLink(), null); Map requestHeaders = this.getRequestHeaders(options, ResourceType.DocumentCollection, OperationType.Replace); Instant serializationStartTimeUTC = Instant.now(); - ByteBuffer byteBuffer = ModelBridgeInternal.serializeJsonToByteBuffer(collection); + ByteBuffer byteBuffer = collection.serializeJsonToByteBuffer(CosmosItemSerializer.DEFAULT_SERIALIZER, null); Instant serializationEndTimeUTC = Instant.now(); SerializationDiagnosticsContext.SerializationDiagnostics serializationDiagnostics = new SerializationDiagnosticsContext.SerializationDiagnostics( serializationStartTimeUTC, @@ -1358,7 +1395,9 @@ private Mono> replaceCollectionInternal(Doc .doOnNext(resourceResponse -> { if (resourceResponse.getResource() != null) { // set the session token - this.sessionContainer.setSessionToken(resourceResponse.getResource().getResourceId(), + this.sessionContainer.setSessionToken( + request, + resourceResponse.getResource().getResourceId(), getAltLink(resourceResponse.getResource()), resourceResponse.getResponseHeaders()); } @@ -1521,7 +1560,7 @@ private static String serializeProcedureParams(List objectArray) { for (int i = 0; i < objectArray.size(); ++i) { Object object = objectArray.get(i); if (object instanceof JsonSerializable) { - stringArray[i] = ModelBridgeInternal.toJsonFromJsonSerializable((JsonSerializable) object); + stringArray[i] = ((JsonSerializable) object).toJson(); } else { // POJO, ObjectNode, number, STRING or Boolean @@ -1651,7 +1690,7 @@ private Map getRequestHeaders(RequestOptions options, ResourceTy headers.put(HttpConstants.HttpHeaders.OFFER_THROUGHPUT, String.valueOf(offer.getThroughput())); } else if (offer.getOfferAutoScaleSettings() != null) { headers.put(HttpConstants.HttpHeaders.OFFER_AUTOPILOT_SETTINGS, - ModelBridgeInternal.toJsonFromJsonSerializable(offer.getOfferAutoScaleSettings())); + offer.getOfferAutoScaleSettings().toJson()); } } } @@ -1755,6 +1794,7 @@ private void addPartitionKeyInformation(RxDocumentServiceRequest request, } request.setPartitionKeyInternal(partitionKeyInternal); + request.setPartitionKeyDefinition(partitionKeyDefinition); request.getHeaders().put(HttpConstants.HttpHeaders.PARTITION_KEY, Utils.escapeNonAscii(partitionKeyInternal.toJson())); } @@ -1778,7 +1818,7 @@ private Mono getCreateDocumentRequest(DocumentClientRe if (options != null) { trackingId = options.getTrackingId(); } - ByteBuffer content = InternalObjectNode.serializeJsonToByteBuffer(document, mapper, trackingId); + ByteBuffer content = InternalObjectNode.serializeJsonToByteBuffer(document, options.getEffectiveItemSerializer(), trackingId); Instant serializationEndTimeUTC = Instant.now(); SerializationDiagnosticsContext.SerializationDiagnostics serializationDiagnostics = new SerializationDiagnosticsContext.SerializationDiagnostics( @@ -1909,6 +1949,7 @@ private RxDocumentServiceRequest addBatchHeaders(RxDocumentServiceRequest reques request.getHeaders().put(HttpConstants.HttpHeaders.IS_BATCH_ATOMIC, String.valueOf(serverBatchRequest.isAtomicBatch())); request.getHeaders().put(HttpConstants.HttpHeaders.SHOULD_BATCH_CONTINUE_ON_ERROR, String.valueOf(serverBatchRequest.isShouldContinueOnError())); + request.setPartitionKeyDefinition(collection.getPartitionKey()); request.setNumberOfItemsInBatchRequest(serverBatchRequest.getOperations().size()); return request; @@ -3601,7 +3642,8 @@ private Flux> pointReadsForReadMany( BridgeInternal.getClientSideRequestStatics(cosmosException.getDiagnostics()))); } else { CosmosItemResponse cosmosItemResponse = - ModelBridgeInternal.createCosmosAsyncItemResponse(resourceResponse, klass, effectiveItemDeserializer); + itemResponseAccessor.createCosmosItemResponse(resourceResponse, klass, effectiveItemSerializer); + feedResponse = ModelBridgeInternal.createFeedResponse( Arrays.asList(cosmosItemResponse.getItem()), cosmosItemResponse.getResponseHeaders()); @@ -3623,22 +3665,33 @@ public Flux> queryDocuments( return queryDocuments(collectionLink, new SqlQuerySpec(query), state, classOfT); } - private ItemDeserializer getEffectiveItemDeserializer( - CosmosQueryRequestOptions queryRequestOptions, - Class klass) { - - Function factoryMethod = queryRequestOptions == null ? - null : - ImplementationBridgeHelpers - .CosmosQueryRequestOptionsHelper - .getCosmosQueryRequestOptionsAccessor() - .getItemFactoryMethod(queryRequestOptions, klass); + @Override + public CosmosItemSerializer getEffectiveItemSerializer(CosmosItemSerializer requestOptionsItemSerializer) { + if (requestOptionsItemSerializer != null) { + return requestOptionsItemSerializer; + } - if (factoryMethod == null) { - return this.itemDeserializer; // using default itemDeserializer + if (this.defaultCustomSerializer != null) { + return this.defaultCustomSerializer; } - return new ItemDeserializer.JsonDeserializer(factoryMethod); + return CosmosItemSerializer.DEFAULT_SERIALIZER; + } + + private CosmosItemSerializer getEffectiveItemSerializer(CosmosQueryRequestOptions queryRequestOptions) { + + CosmosItemSerializer requestOptionsItemSerializer = + queryRequestOptions != null ? queryRequestOptions.getCustomItemSerializer() : null; + + return this.getEffectiveItemSerializer(requestOptionsItemSerializer); + } + + private CosmosItemSerializer getEffectiveItemSerializer(CosmosItemRequestOptions itemRequestOptions) { + + CosmosItemSerializer requestOptionsItemSerializer = + itemRequestOptions != null ? itemRequestOptions.getCustomItemSerializer() : null; + + return this.getEffectiveItemSerializer(requestOptionsItemSerializer); } private IDocumentQueryClient documentQueryClientImpl(RxDocumentClientImpl rxDocumentClientImpl, OperationContextAndListenerTuple operationContextAndListenerTuple) { @@ -3713,6 +3766,11 @@ public Mono executeFeedOperationWithAvailabilityStrategy( ); } + @Override + public CosmosItemSerializer getEffectiveItemSerializer(CosmosQueryRequestOptions queryRequestOptions) { + return RxDocumentClientImpl.this.getEffectiveItemSerializer(queryRequestOptions); + } + @Override public Mono readFeedAsync(RxDocumentServiceRequest request) { // TODO Auto-generated method stub @@ -5299,12 +5357,12 @@ private Mono getDatabaseAccountInternal(DocumentClientRetryPoli } } - public Object getSession() { + public ISessionContainer getSession() { return this.sessionContainer; } - public void setSession(Object sessionContainer) { - this.sessionContainer = (SessionContainer) sessionContainer; + public void setSession(ISessionContainer sessionContainer) { + this.sessionContainer = sessionContainer; } @Override @@ -5444,12 +5502,6 @@ public void close() { logger.warn("Already shutdown!"); } } - - @Override - public ItemDeserializer getItemDeserializer() { - return this.itemDeserializer; - } - @Override public synchronized void enableThroughputControlGroup(ThroughputControlGroupInternal group, Mono throughputQueryMono) { checkNotNull(group, "Throughput control group can not be null"); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index baad4f6aba55..dff9c788e7b2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -52,7 +52,6 @@ public class StoreClient implements IStoreClient { private final DiagnosticsClientContext diagnosticsClientContext; private final Logger logger = LoggerFactory.getLogger(StoreClient.class); private final GatewayServiceConfigurationReader serviceConfigurationReader; - private final ISessionContainer sessionContainer; private final IAddressResolver addressResolver; private final ReplicatedResourceClient replicatedResourceClient; @@ -84,6 +83,7 @@ public StoreClient( sessionRetryOptions); addressResolver.setOpenConnectionsProcessor(this.transportClient.getProactiveOpenConnectionsProcessor()); + this.addressResolver = addressResolver; } public void enableThroughputControl(ThroughputControlStore throughputControlStore) { @@ -191,6 +191,13 @@ private RxDocumentServiceResponse completeResponse( RxDocumentServiceResponse rxDocumentServiceResponse = new RxDocumentServiceResponse(this.diagnosticsClientContext, storeResponse); rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = addressResolver.getGlobalPartitionEndpointManagerForCircuitBreaker(); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); + } + return rxDocumentServiceResponse; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index a93bc73a8ee8..224b2bcdbb8c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -202,43 +202,40 @@ private Mono> executeInternalFuncCore( return BackoffRetryUtility.executeRetry(() -> { this.retries.incrementAndGet(); - return executeRequestAsync( - this.itemSerializer, - req); - return Mono.just(req) - .flatMap(request -> client.populateFeedRangeHeader(request)) - .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions)) - .flatMap(request -> { - - finalRetryPolicyInstance.onBeforeSendRequest(request); - return executeRequestAsync( - this.factoryMethod, - request); - }); - }, finalRetryPolicyInstance) - .map(tFeedResponse -> { - this.fetchSchedulingMetrics.stop(); - this.fetchExecutionRangeAccumulator.endFetchRange(tFeedResponse.getActivityId(), - tFeedResponse.getResults().size(), - this.retries.get()); - ImmutablePair schedulingTimeSpanMap = - new ImmutablePair<>(DEFAULT_PARTITION_RANGE, this.fetchSchedulingMetrics.getElapsedTime()); - if (!StringUtils.isEmpty(tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.QUERY_METRICS))) { - QueryMetrics qm = - BridgeInternal.createQueryMetricsFromDelimitedStringAndClientSideMetrics(tFeedResponse.getResponseHeaders() - .get(HttpConstants.HttpHeaders.QUERY_METRICS), - new ClientSideMetrics(this.retries.get(), - tFeedResponse.getRequestCharge(), - this.fetchExecutionRangeAccumulator.getExecutionRanges(), - Collections.singletonList(schedulingTimeSpanMap)), - tFeedResponse.getActivityId(), - tFeedResponse.getResponseHeaders().getOrDefault(HttpConstants.HttpHeaders.INDEX_UTILIZATION, null)); - String pkrId = tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID); - String queryMetricKey = DEFAULT_PARTITION_RANGE + ",pkrId:" + pkrId; - BridgeInternal.putQueryMetricsIntoMap(tFeedResponse, queryMetricKey, qm); - } - return tFeedResponse; - }); + + return Mono.just(req) + .flatMap(request -> client.populateFeedRangeHeader(request)) + .flatMap(request -> client.addPartitionLevelUnavailableRegionsOnRequest(request, cosmosQueryRequestOptions)) + .flatMap(request -> { + finalRetryPolicyInstance.onBeforeSendRequest(request); + return executeRequestAsync( + this.itemSerializer, + req); + }); + }, finalRetryPolicyInstance) + .map(tFeedResponse -> { + this.fetchSchedulingMetrics.stop(); + this.fetchExecutionRangeAccumulator.endFetchRange(tFeedResponse.getActivityId(), + tFeedResponse.getResults().size(), + this.retries.get()); + ImmutablePair schedulingTimeSpanMap = + new ImmutablePair<>(DEFAULT_PARTITION_RANGE, this.fetchSchedulingMetrics.getElapsedTime()); + if (!StringUtils.isEmpty(tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.QUERY_METRICS))) { + QueryMetrics qm = + BridgeInternal.createQueryMetricsFromDelimitedStringAndClientSideMetrics(tFeedResponse.getResponseHeaders() + .get(HttpConstants.HttpHeaders.QUERY_METRICS), + new ClientSideMetrics(this.retries.get(), + tFeedResponse.getRequestCharge(), + this.fetchExecutionRangeAccumulator.getExecutionRanges(), + Collections.singletonList(schedulingTimeSpanMap)), + tFeedResponse.getActivityId(), + tFeedResponse.getResponseHeaders().getOrDefault(HttpConstants.HttpHeaders.INDEX_UTILIZATION, null)); + String pkrId = tFeedResponse.getResponseHeaders().get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID); + String queryMetricKey = DEFAULT_PARTITION_RANGE + ",pkrId:" + pkrId; + BridgeInternal.putQueryMetricsIntoMap(tFeedResponse, queryMetricKey, qm); + } + return tFeedResponse; + }); } public RxDocumentServiceRequest createRequestAsync(String continuationToken, Integer maxPageSize) { @@ -253,7 +250,8 @@ public RxDocumentServiceRequest createRequestAsync(String continuationToken, Int RxDocumentServiceRequest request = this.createDocumentServiceRequest( requestHeaders, this.query, - this.getPartitionKeyInternal()); + this.getPartitionKeyInternal(), + this.getPartitionKeyDefinition()); if (!StringUtils.isEmpty(getPartitionKeyRangeIdInternal(cosmosQueryRequestOptions))) { request.routeTo(new PartitionKeyRangeIdentity(getPartitionKeyRangeIdInternal(cosmosQueryRequestOptions))); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index 82a251ccd1fa..cc5ef9b7f5cf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -206,19 +206,10 @@ public Flux produceAsync() { pageSize, Paginator.getPreFetchCount(cosmosQueryRequestOptions, top, pageSize), qryOptionsAccessor.getImpl(cosmosQueryRequestOptions).getOperationContextAndListenerTuple(), - qryOptionsAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions) + qryOptionsAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), + client.getGlobalEndpointManager(), + client.getGlobalPartitionEndpointManagerForCircuitBreaker() ) - ImplementationBridgeHelpers - .CosmosQueryRequestOptionsHelper - .getCosmosQueryRequestOptionsAccessor() - .getOperationContext(cosmosQueryRequestOptions), - ImplementationBridgeHelpers - .CosmosQueryRequestOptionsHelper - .getCosmosQueryRequestOptionsAccessor() - .getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), - client.getGlobalEndpointManager(), - client.getGlobalPartitionEndpointManagerForCircuitBreaker() - ) .map(rsp -> { this.lastResponseContinuationToken = rsp.getContinuationToken(); this.fetchExecutionRangeAccumulator.endFetchRange(rsp.getActivityId(), @@ -286,7 +277,7 @@ private Flux feedRangeGoneProof(Flux Flux> getPaginatedQueryResultAsObservable( maxPageSize, getPreFetchCount(cosmosQueryRequestOptions, top, maxPageSize), qryOptAccessor.getImpl(cosmosQueryRequestOptions).getOperationContextAndListenerTuple(), - qryOptAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions)); + qryOptAccessor.getCancelledRequestDiagnosticsTracker(cosmosQueryRequestOptions), + globalEndpointManager, + globalPartitionEndpointManagerForCircuitBreaker); } public static Flux> getPaginatedQueryResultAsObservable( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index 8f48fc39ca53..6ad671c42243 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -632,49 +632,28 @@ public String getRequestContinuation(CosmosQueryRequestOptions options) { @Override public void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition) { - options.setPartitionKeyDefinition(partitionKeyDefinition); + options.actualRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); } @Override public PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options) { - return options.getPartitionKeyDefinition(); + return options.actualRequestOptions.getPartitionKeyDefinition(); } @Override public void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid) { - options.setCollectionRid(collectionRid); + options.actualRequestOptions.setCollectionRid(collectionRid); } @Override public String getCollectionRid(CosmosQueryRequestOptions options) { - return options.getCollectionRid(); - } - - @Override - public void setPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options, Set pkRangesWithSuccessfulRequests) { - options.setPkRangesWithSuccessfulRequests(pkRangesWithSuccessfulRequests); - } - - @Override - public Set getPkRangesWithSuccessfulRequests(CosmosQueryRequestOptions options) { - return options.getPkRangesWithSuccessfulRequests(); + return options.actualRequestOptions.getCollectionRid(); } @Override public Integer getMaxItemCountForVectorSearch(CosmosQueryRequestOptions options) { return options.getMaxItemCountForVectorSearch(); } - - @Override - public void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition) { - options.setPartitionKeyDefinition(partitionKeyDefinition); - } - - @Override - public PartitionKeyDefinition getPartitionKeyDefinition(CosmosQueryRequestOptions options) { - return options.getPartitionKeyDefinition(); - - } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java index 8f5997499d01..ffef1e61212b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java @@ -10,9 +10,11 @@ import com.azure.cosmos.implementation.CosmosQueryRequestOptionsBase; import com.azure.cosmos.implementation.CosmosReadManyRequestOptionsImpl; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.PartitionKeyRange; import java.time.Duration; import java.util.List; +import java.util.Set; /** * Specifies the options associated with read many operation From d2203cdd90f76056f50e084ad980990a7a4de7b1 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 20 May 2024 20:04:55 -0400 Subject: [PATCH 033/140] Fixing CI pipeline. --- .../implementation/RxDocumentClientImpl.java | 2 +- .../GoneAndRetryWithRetryPolicy.java | 15 --------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 6bc3a4eff8ef..3b7bac84d2fc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -541,7 +541,7 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); - ((GlobalPartitionEndpointManagerForCircuitBreaker) this.globalPartitionEndpointManagerForCircuitBreaker).init(); + this.globalPartitionEndpointManagerForCircuitBreaker.init(); this.retryPolicy = new RetryPolicy( this, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java index 1380b772eac3..ceb1ddb4357f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java @@ -194,8 +194,6 @@ public Mono shouldRetry(Exception exception) { Duration timeout; boolean forceRefreshAddressCache; - bookmarkException(this.request, exception); - if (isNonRetryableException(exception)) { logger.debug("Operation will NOT be retried. Current attempt {}, Exception: ", this.attemptCount, exception); @@ -313,19 +311,6 @@ private Pair, Boolean> handleInvalidPartitionException(I return Pair.of(null, false); } - - private static boolean bookmarkException(RxDocumentServiceRequest request, Exception exception) { - - if (exception instanceof CosmosException) { - CosmosException cosmosException = Utils.as(exception, CosmosException.class); - - if (request.requestContext == null) { - return false; - } - } - - return false; - } } class RetryWithRetryPolicy implements IRetryPolicy { From 0443b682ab33294f76febb1ef6cca2e273001468 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 20 May 2024 20:08:38 -0400 Subject: [PATCH 034/140] Fixing CI pipeline. --- .../java/com/azure/cosmos/CosmosItemTest.java | 2 +- ...njectionWithAvailabilityStrategyTests.java | 1598 ++++++++--------- 2 files changed, 800 insertions(+), 800 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java index 6381ddbc07f6..020b845fae9d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosItemTest.java @@ -176,7 +176,7 @@ public void readItemWithVeryLargePartitionKey() throws Exception { validateItemResponse(docDefinition, readResponse); } - @Test(groups = { "fast" }/*, timeOut = TIMEOUT*/) + @Test(groups = { "fast" }, timeOut = TIMEOUT) public void readItem() throws Exception { InternalObjectNode properties = getDocumentDefinition(UUID.randomUUID().toString()); CosmosItemResponse itemResponse = container.createItem(properties); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java index 32ae9923850e..4a1901a07cf1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/FaultInjectionWithAvailabilityStrategyTests.java @@ -2548,500 +2548,500 @@ public Object[][] testConfigs_queryAfterCreation() { // }, // Plain vanilla single partition query. No failure injection and all records will fit into a single page -// new Object[] { -// "DefaultPageSize_SinglePartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// null, -// validateExactlyOneRecordReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple cross partition query. No failure injection and all records returned for a partition will fit -// // into a single page. But there will be one page per partition -// new Object[] { -// "DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// PHYSICAL_PARTITION_COUNT, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// validateAllRecordsSameIdReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple single partition query. No failure injection but page size set to 1 - so, multiple pages will -// // be returned from the PagedFlux - for each document one page - and the expectation is that there -// // will be as many CosmosDiagnosticsContext instances as pages. -// new Object[] { -// "PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOne, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// validateAllRecordsSamePartitionReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE -// }, -// -// // Simple cross partition query. No failure injection but page size set to 1 - so, multiple pages will -// // be returned from the PagedFlux per physical partition - for each document one page - and the -// // expectation is that there will be as many CosmosDiagnosticsContext instances as pages. -// new Object[] { -// "PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOne, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// validateAllRecordsSameIdReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple single partition query intended to not return any results. No failure injection and only -// // one empty page expected - with exactly one CosmosDiagnostics instance -// new Object[] { -// "EmptyResults_SinglePartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// singlePartitionEmptyResultQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOne, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan -// ), -// null, -// validateEmptyResults, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple cross partition query intended to not return any results. No failures injected. -// // Empty pages should be skipped (except for the last one) - so, exactly one empty page expected - -// // with exactly one CosmosDiagnostics instance - even when this is a cross-partition query touching all -// // partitions -// new Object[] { -// "EmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionEmptyResultQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOne, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// // empty pages are skipped except for the last one -// 1, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// } -// ), -// null, -// validateEmptyResults, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple cross partition query intended to not return any results. No failures injected. -// // Empty pages should be returned - so, exactly one page per partition expected - -// // with exactly one CosmosDiagnostics instance (plus query plan on very first one) -// new Object[] { -// "EmptyResults_EnableEmptyPageRetrieval_CrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionEmptyResultQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOneAndEmptyPagesEnabled, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// // empty pages are bubbled up -// PHYSICAL_PARTITION_COUNT, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) -// .isEqualTo(1); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(1); -// } -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[0].getClientSideRequestStatistics().size()) -// .isEqualTo(1); -// assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(1); -// } -// ), -// validateEmptyResults, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple cross partition query intended to not return any results except on one partition. -// // No failures injected. Empty pages of all but one partition will be skipped, but -// // query metrics and client side request statistics are captured in the merged diagnostics. -// new Object[] { -// "AllButOnePartitionEmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// } -// ), -// null, -// validateExactlyOneRecordReturned, -// NO_OTHER_DOCS_WITH_SAME_ID, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Single partition query with DISTINCT and ORDER BY. No failures injected -// // Expect to get as many pages and diagnostics contexts as there are documents for this PK-value -// new Object[] { -// "AggregatesAndOrderBy_PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// singlePartitionWithAggregatesAndOrderByQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOne, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// validateAllRecordsSamePartitionReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE -// }, -// -// // Single partition query with DISTINCT and ORDER BY. No failures injected -// // Only a single document matches the where condition - but this is a cross partition query. Because -// // the single page returned in the CosmosPagedFlux had to peek into all physical partitions to be -// // able to achieve global ordering in the query pipeline a single CosmosDiagnosticsContext instance -// // is returned - but with query metrics and client request statistics for all partitions -// new Object[] { -// "AggregatesAndOrderBy_PageSizeOne_CrossPartitionSingleRecord_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionWithAggregatesAndOrderByQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOne, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// } -// ), -// null, -// validateExactlyOneRecordReturned, -// NO_OTHER_DOCS_WITH_SAME_PK, -// NO_OTHER_DOCS_WITH_SAME_ID -// }, -// -// // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where -// // condition but the distinct id value is identical - so, to the application only a single record is -// // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances -// // as there are documents with the same id-value. -// new Object[] { -// "AggregatesAndOrderBy_PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionWithAggregatesAndOrderByQueryGenerator, -// queryReturnsTotalRecordCountWithPageSizeOne, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxOnlyFeedResponsesExceptQueryPlan -// ), -// validateExactlyOneRecordReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where -// // condition but the distinct id value is identical - so, to the application only a single record is -// // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances -// // as there are documents with the same id-value. -// new Object[] { -// "AggregatesAndOrderBy_DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionWithAggregatesAndOrderByQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// PHYSICAL_PARTITION_COUNT, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) -// .isEqualTo(1); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(1); -// } -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[0].getClientSideRequestStatistics().size()) -// .isEqualTo(1); -// assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(1); -// } -// ), -// validateExactlyOneRecordReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Cross partition query with DISTINCT and ORDER BY. Single document meets the where -// // condition, but queries against all partitions need to be executed. Expect to see a single -// // page and CosmosDiagnosticsContext - but including three request statistics and query metrics. -// new Object[] { -// "AggregatesAndOrderBy_DefaultPageSize_SingleRecordCrossPartition_AllGood_NoAvailabilityStrategy", -// ONE_SECOND_DURATION, -// noAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// crossPartitionWithAggregatesAndOrderByQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// noFailureInjection, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[1].getClientSideRequestStatistics().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) -// .isEqualTo(PHYSICAL_PARTITION_COUNT); -// } -// ), -// null, -// validateExactlyOneRecordReturned, -// NO_OTHER_DOCS_WITH_SAME_ID, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple single partition query - 404/1002 injected into all partition of the first region -// // RegionSwitchHint is local - with eager availability strategy - so, the expectation is that the -// // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext -// // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for -// // the attempt in the first region and third one for hedging returning successful response. -// new Object[] { -// "DefaultPageSize_SinglePartition_404-1002_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", -// Duration.ofSeconds(10), -// eagerThresholdAvailabilityStrategy, -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectReadSessionNotAvailableIntoFirstRegionOnly, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxTwoRegions, -// validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isEqualTo(3); -// -// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region -// // (possibly also fail-over to secondary region) -// assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); -// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) -// .isEqualTo(true); -// -// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region -// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) -// .isEqualTo(true); -// } -// ), -// null, -// validateExactlyOneRecordReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple cross partition query - 404/1002 injected into all partition of the first region -// // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the -// // retry on the first region will provide a successful response and no hedging is happening. -// // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have -// // a single CosmosDiagnostics instance contacting both regions. -// new Object[] { -// "DefaultPageSize_CrossPartition_404-1002_OnlyFirstRegion_AllPartitions_RemotePreferred_ReluctantAvailabilityStrategy", -// THREE_SECOND_DURATION, -// reluctantThresholdAvailabilityStrategy, -// CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// crossPartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectReadSessionNotAvailableIntoFirstRegionOnly, -// validateStatusCodeIs200Ok, -// PHYSICAL_PARTITION_COUNT, -// ArrayUtils.toArray( -// validateCtxTwoRegions, -// validateCtxQueryPlan, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isEqualTo(2); -// -// // Ensure fail-over happened -// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); -// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) -// .isEqualTo(true); -// assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) -// .isEqualTo(true); -// } -// ), -// ArrayUtils.toArray( -// validateCtxTwoRegions, -// validateCtxOnlyFeedResponsesExceptQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isEqualTo(1); -// -// // Ensure fail-over happened -// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(2); -// assertThat(diagnostics[0].getContactedRegionNames().contains(FIRST_REGION_NAME)) -// .isEqualTo(true); -// assertThat(diagnostics[0].getContactedRegionNames().contains(SECOND_REGION_NAME)) -// .isEqualTo(true); -// } -// ), -// validateAllRecordsSameIdReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, + new Object[] { + "DefaultPageSize_SinglePartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + noFailureInjection, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + null, + validateExactlyOneRecordReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple cross partition query. No failure injection and all records returned for a partition will fit + // into a single page. But there will be one page per partition + new Object[] { + "DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + noFailureInjection, + validateStatusCodeIs200Ok, + PHYSICAL_PARTITION_COUNT, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + validateAllRecordsSameIdReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple single partition query. No failure injection but page size set to 1 - so, multiple pages will + // be returned from the PagedFlux - for each document one page - and the expectation is that there + // will be as many CosmosDiagnosticsContext instances as pages. + new Object[] { + "PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOne, + noFailureInjection, + validateStatusCodeIs200Ok, + 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + validateAllRecordsSamePartitionReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE + }, + + // Simple cross partition query. No failure injection but page size set to 1 - so, multiple pages will + // be returned from the PagedFlux per physical partition - for each document one page - and the + // expectation is that there will be as many CosmosDiagnosticsContext instances as pages. + new Object[] { + "PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOne, + noFailureInjection, + validateStatusCodeIs200Ok, + 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + validateAllRecordsSameIdReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple single partition query intended to not return any results. No failure injection and only + // one empty page expected - with exactly one CosmosDiagnostics instance + new Object[] { + "EmptyResults_SinglePartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + singlePartitionEmptyResultQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOne, + noFailureInjection, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan + ), + null, + validateEmptyResults, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple cross partition query intended to not return any results. No failures injected. + // Empty pages should be skipped (except for the last one) - so, exactly one empty page expected - + // with exactly one CosmosDiagnostics instance - even when this is a cross-partition query touching all + // partitions + new Object[] { + "EmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionEmptyResultQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOne, + noFailureInjection, + validateStatusCodeIs200Ok, + // empty pages are skipped except for the last one + 1, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[1].getClientSideRequestStatistics().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + } + ), + null, + validateEmptyResults, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple cross partition query intended to not return any results. No failures injected. + // Empty pages should be returned - so, exactly one page per partition expected - + // with exactly one CosmosDiagnostics instance (plus query plan on very first one) + new Object[] { + "EmptyResults_EnableEmptyPageRetrieval_CrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionEmptyResultQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOneAndEmptyPagesEnabled, + noFailureInjection, + validateStatusCodeIs200Ok, + // empty pages are bubbled up + PHYSICAL_PARTITION_COUNT, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[1].getClientSideRequestStatistics().size()) + .isEqualTo(1); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(1); + } + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[0].getClientSideRequestStatistics().size()) + .isEqualTo(1); + assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(1); + } + ), + validateEmptyResults, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple cross partition query intended to not return any results except on one partition. + // No failures injected. Empty pages of all but one partition will be skipped, but + // query metrics and client side request statistics are captured in the merged diagnostics. + new Object[] { + "AllButOnePartitionEmptyResults_CrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + noFailureInjection, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[1].getClientSideRequestStatistics().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + } + ), + null, + validateExactlyOneRecordReturned, + NO_OTHER_DOCS_WITH_SAME_ID, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Single partition query with DISTINCT and ORDER BY. No failures injected + // Expect to get as many pages and diagnostics contexts as there are documents for this PK-value + new Object[] { + "AggregatesAndOrderBy_PageSizeOne_SinglePartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + singlePartitionWithAggregatesAndOrderByQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOne, + noFailureInjection, + validateStatusCodeIs200Ok, + 1 + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + validateAllRecordsSamePartitionReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + ENOUGH_DOCS_SAME_PK_TO_EXCEED_PAGE_SIZE + }, + + // Single partition query with DISTINCT and ORDER BY. No failures injected + // Only a single document matches the where condition - but this is a cross partition query. Because + // the single page returned in the CosmosPagedFlux had to peek into all physical partitions to be + // able to achieve global ordering in the query pipeline a single CosmosDiagnosticsContext instance + // is returned - but with query metrics and client request statistics for all partitions + new Object[] { + "AggregatesAndOrderBy_PageSizeOne_CrossPartitionSingleRecord_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionWithAggregatesAndOrderByQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOne, + noFailureInjection, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[1].getClientSideRequestStatistics().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + } + ), + null, + validateExactlyOneRecordReturned, + NO_OTHER_DOCS_WITH_SAME_PK, + NO_OTHER_DOCS_WITH_SAME_ID + }, + + // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where + // condition but the distinct id value is identical - so, to the application only a single record is + // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances + // as there are documents with the same id-value. + new Object[] { + "AggregatesAndOrderBy_PageSizeOne_CrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionWithAggregatesAndOrderByQueryGenerator, + queryReturnsTotalRecordCountWithPageSizeOne, + noFailureInjection, + validateStatusCodeIs200Ok, + 1 + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxOnlyFeedResponsesExceptQueryPlan + ), + validateExactlyOneRecordReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Cross partition query with DISTINCT and ORDER BY. Documents from all partitions meet the where + // condition but the distinct id value is identical - so, to the application only a single record is + // returned. Because the page size is 1 we expect as many pages / CosmosDiagnosticsContext instances + // as there are documents with the same id-value. + new Object[] { + "AggregatesAndOrderBy_DefaultPageSize_CrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionWithAggregatesAndOrderByQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + noFailureInjection, + validateStatusCodeIs200Ok, + PHYSICAL_PARTITION_COUNT, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[1].getClientSideRequestStatistics().size()) + .isEqualTo(1); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(1); + } + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[0].getClientSideRequestStatistics().size()) + .isEqualTo(1); + assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(1); + } + ), + validateExactlyOneRecordReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Cross partition query with DISTINCT and ORDER BY. Single document meets the where + // condition, but queries against all partitions need to be executed. Expect to see a single + // page and CosmosDiagnosticsContext - but including three request statistics and query metrics. + new Object[] { + "AggregatesAndOrderBy_DefaultPageSize_SingleRecordCrossPartition_AllGood_NoAvailabilityStrategy", + ONE_SECOND_DURATION, + noAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + crossPartitionWithAggregatesAndOrderByQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + noFailureInjection, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxSingleRegion, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[1].getClientSideRequestStatistics().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap().size()) + .isEqualTo(PHYSICAL_PARTITION_COUNT); + } + ), + null, + validateExactlyOneRecordReturned, + NO_OTHER_DOCS_WITH_SAME_ID, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple single partition query - 404/1002 injected into all partition of the first region + // RegionSwitchHint is local - with eager availability strategy - so, the expectation is that the + // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext + // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for + // the attempt in the first region and third one for hedging returning successful response. + new Object[] { + "DefaultPageSize_SinglePartition_404-1002_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", + Duration.ofSeconds(10), + eagerThresholdAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectReadSessionNotAvailableIntoFirstRegionOnly, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxTwoRegions, + validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isEqualTo(3); + + // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region + // (possibly also fail-over to secondary region) + assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); + assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) + .isEqualTo(true); + + // Ensure second FeedResponse CosmoDiagnostics has only requests to second region + assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) + .isEqualTo(true); + } + ), + null, + validateExactlyOneRecordReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple cross partition query - 404/1002 injected into all partition of the first region + // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the + // retry on the first region will provide a successful response and no hedging is happening. + // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have + // a single CosmosDiagnostics instance contacting both regions. + new Object[] { + "DefaultPageSize_CrossPartition_404-1002_OnlyFirstRegion_AllPartitions_RemotePreferred_ReluctantAvailabilityStrategy", + THREE_SECOND_DURATION, + reluctantThresholdAvailabilityStrategy, + CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED, + ConnectionMode.DIRECT, + crossPartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectReadSessionNotAvailableIntoFirstRegionOnly, + validateStatusCodeIs200Ok, + PHYSICAL_PARTITION_COUNT, + ArrayUtils.toArray( + validateCtxTwoRegions, + validateCtxQueryPlan, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isEqualTo(2); + + // Ensure fail-over happened + assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); + assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) + .isEqualTo(true); + assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) + .isEqualTo(true); + } + ), + ArrayUtils.toArray( + validateCtxTwoRegions, + validateCtxOnlyFeedResponsesExceptQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isEqualTo(1); + + // Ensure fail-over happened + assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(2); + assertThat(diagnostics[0].getContactedRegionNames().contains(FIRST_REGION_NAME)) + .isEqualTo(true); + assertThat(diagnostics[0].getContactedRegionNames().contains(SECOND_REGION_NAME)) + .isEqualTo(true); + } + ), + validateAllRecordsSameIdReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, // Simple cross partition query - 404/1002 injected into only a single partition of the first region // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the @@ -3050,7 +3050,7 @@ public Object[][] testConfigs_queryAfterCreation() { // a single CosmosDiagnostics instance contacting both regions. new Object[] { "DefaultPageSize_CrossPartition_404-1002_OnlyFirstRegion_SinglePartition_RemotePreferred_ReluctantAvailabilityStrategy", - Duration.ofSeconds(50), + ONE_SECOND_DURATION, reluctantThresholdAvailabilityStrategy, CosmosRegionSwitchHint.REMOTE_REGION_PREFERRED, ConnectionMode.DIRECT, @@ -3098,313 +3098,313 @@ public Object[][] testConfigs_queryAfterCreation() { // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for // the attempt in the first region and third one for hedging returning successful response. -// new Object[] { -// "DefaultPageSize_SinglePartition_503_AllRegions_EagerAvailabilityStrategy", -// Duration.ofSeconds(10), -// eagerThresholdAvailabilityStrategy, -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectServiceUnavailableIntoAllRegions, -// validateStatusCodeIsServiceUnavailable, -// 1, -// ArrayUtils.toArray( -// validateCtxTwoRegions, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isEqualTo(3); -// -// // Ensure first FeedResponse reaches both regions since Clinet Retry -// // policy should kick in and retry in remote region -// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); -// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) -// .isEqualTo(true); -// assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) -// .isEqualTo(true); -// -// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region -// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) -// .isEqualTo(true); -// } -// ), -// null, -// null, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple single partition query. Gateway timeout for query plan retrieval in first region injected. -// // This test case validates that the availability strategy and hedging is also applied for the -// // query plan request. The expectation is that the query plan request in the first region won't finish, -// // the query plan will then be retrieved from the second region but the actual query is executed against the -// // first region. -// new Object[] { -// "DefaultPageSize_SinglePartition_QueryPLanHighLatency_EagerAvailabilityStrategy", -// THREE_SECOND_DURATION, -// reluctantThresholdAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectQueryPlanTransitTimeoutIntoFirstRegionOnly, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxTwoRegions, -// validateCtxQueryPlan, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isGreaterThanOrEqualTo(3); -// -// // Ensure that the query plan has been retrieved from the second region -// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); -// assertThat(diagnostics[0].getClientSideRequestStatistics()).isNotNull(); -// assertThat(diagnostics[0].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); -// ClientSideRequestStatistics requestStats = diagnostics[0].getClientSideRequestStatistics().iterator().next(); -// assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); -// assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); -// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); -// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(408); -// -// // Ensure that the query plan has been retrieved from the second region -// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); -// assertThat(diagnostics[1].getClientSideRequestStatistics()).isNotNull(); -// assertThat(diagnostics[1].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); -// requestStats = diagnostics[1].getClientSideRequestStatistics().iterator().next(); -// assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); -// assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); -// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); -// assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(200); -// -// -// // There possibly is an incomplete diagnostics for the failed query plan retrieval in the first region -// // Last Diagnostics should be for processed request against the first region with the -// // query plan retrieved from the second region -// boolean found = false; -// for (int i = 2; i < diagnostics.length; i++) { -// if (diagnostics[i].getFeedResponseDiagnostics() != null && -// diagnostics[i].getFeedResponseDiagnostics().getQueryMetricsMap() != null) { -// -// found = true; -// assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); -// assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); -// assertThat(diagnostics[i].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[i].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); -// } -// } -// -// assertThat(found).isEqualTo(true); -// } -// ), -// null, -// validateExactlyOneRecordReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// // Simple single partition query - 429/3200 injected into all partition of the first region -// // Eager availability strategy - so, the expectation is that the -// // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext -// // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for -// // the attempt in the first region and third one for hedging returning successful response. -// new Object[] { -// "DefaultPageSize_SinglePartition_429-3200_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", -// TWO_SECOND_DURATION, -// eagerThresholdAvailabilityStrategy, -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectRequestRateTooLargeIntoFirstRegionOnly, -// validateStatusCodeIs200Ok, -// 1, -// ArrayUtils.toArray( -// validateCtxTwoRegions, -// validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isEqualTo(3); -// -// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region -// // (possibly also fail-over to secondary region) -// assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); -// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) -// .isEqualTo(true); -// -// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region -// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) -// .isEqualTo(true); -// } -// ), -// null, -// validateExactlyOneRecordReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// // Simple single partition query - 429/3200 injected into all regions -// // Eager availability strategy - the expectation is that even with hedging, the request will time out -// new Object[] { -// "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_EagerAvailabilityStrategy", -// TWO_SECOND_DURATION, -// eagerThresholdAvailabilityStrategy, -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectRequestRateTooLargeIntoAllRegions, -// validateStatusCodeIsOperationCancelled, -// 1, -// ArrayUtils.toArray( -// validateCtxTwoRegions, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isEqualTo(3); -// -// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region -// // (possibly also fail-over to secondary region) -// assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); -// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) -// .isEqualTo(true); -// -// // Ensure second FeedResponse CosmoDiagnostics has only requests to second region -// assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) -// .isEqualTo(true); -// } -// ), -// null, -// null, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// -// // Simple single partition query - 429/3200 injected into first region only -// // no availability strategy - the expectation is that no hedging will happen, the request will time out -// new Object[] { -// "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_noAvailabilityStrategy", -// TWO_SECOND_DURATION, -// noAvailabilityStrategy, -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// ConnectionMode.DIRECT, -// singlePartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectRequestRateTooLargeIntoAllRegions, -// validateStatusCodeIsOperationCancelled, -// 1, -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// (ctx) -> { -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics.length).isEqualTo(2); -// -// // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region -// // (possibly also fail-over to secondary region) -// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) -// .isEqualTo(true); -// assertThat(diagnostics[1].clientSideRequestStatistics().getResponseStatisticsList().size()).isGreaterThan(1); -// } -// ), -// null, -// null, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// }, -// // GATEWAY MODE -// // ------------ -// -// // Simple cross partition query - 404/1002 injected into all partition of the first region -// // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the -// // retry on the first region will provide a successful response and no hedging is happening. -// // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have -// // a single CosmosDiagnostics instance contacting both regions. -// new Object[] { -// "GW_DefaultPageSize_CrossPartition_GW408_EagerAvailabilityStrategy", -// THREE_SECOND_DURATION, -// eagerThresholdAvailabilityStrategy, -// noRegionSwitchHint, -// ConnectionMode.GATEWAY, -// crossPartitionQueryGenerator, -// queryReturnsTotalRecordCountWithDefaultPageSize, -// injectGatewayTransitTimeoutIntoFirstRegionOnly, -// validateStatusCodeIs200Ok, -// PHYSICAL_PARTITION_COUNT, -// ArrayUtils.toArray( -// validateCtxTwoRegions, // query plan 1st region, all queries 2nd region -// validateCtxQueryPlan, -// (ctx) -> { -// assertThat(ctx.getDiagnostics()).isNotNull(); -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// -// // Diagnostics of query attempt in first region not even available yet -// assertThat(diagnostics.length).isEqualTo(2); -// -// // query plan on first region -// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); -// }, -// (ctx) -> { -// assertThat(ctx.getDiagnostics()).isNotNull(); -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); -// assertThat(diagnostics[1].getFeedResponseDiagnostics()).isNotNull(); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); -// assertThat(diagnostics[1].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); -// ClientSideRequestStatistics[] clientStats = -// diagnostics[1] -// .getFeedResponseDiagnostics() -// .getClientSideRequestStatistics() -// .toArray(new ClientSideRequestStatistics[0]); -// assertThat(clientStats.length).isEqualTo(1); -// for (int i = 0; i < clientStats.length; i++) { -// assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); -// assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); -// assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); -// assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); -// assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); -// } -// } -// ), -// ArrayUtils.toArray( -// validateCtxSingleRegion, -// (ctx) -> { -// assertThat(ctx.getDiagnostics()).isNotNull(); -// CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); -// assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); -// assertThat(diagnostics[0].getFeedResponseDiagnostics()).isNotNull(); -// assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); -// assertThat(diagnostics[0].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); -// ClientSideRequestStatistics[] clientStats = -// diagnostics[0] -// .getFeedResponseDiagnostics() -// .getClientSideRequestStatistics() -// .toArray(new ClientSideRequestStatistics[0]); -// assertThat(clientStats.length).isEqualTo(1); -// for (int i = 0; i < clientStats.length; i++) { -// assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); -// assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); -// assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); -// assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); -// assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); -// assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); -// } -// } -// ), -// validateAllRecordsSameIdReturned, -// ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, -// NO_OTHER_DOCS_WITH_SAME_PK -// } + new Object[] { + "DefaultPageSize_SinglePartition_503_AllRegions_EagerAvailabilityStrategy", + Duration.ofSeconds(10), + eagerThresholdAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectServiceUnavailableIntoAllRegions, + validateStatusCodeIsServiceUnavailable, + 1, + ArrayUtils.toArray( + validateCtxTwoRegions, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isEqualTo(3); + + // Ensure first FeedResponse reaches both regions since Clinet Retry + // policy should kick in and retry in remote region + assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(2); + assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) + .isEqualTo(true); + assertThat(diagnostics[1].getContactedRegionNames().contains(SECOND_REGION_NAME)) + .isEqualTo(true); + + // Ensure second FeedResponse CosmoDiagnostics has only requests to second region + assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) + .isEqualTo(true); + } + ), + null, + null, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple single partition query. Gateway timeout for query plan retrieval in first region injected. + // This test case validates that the availability strategy and hedging is also applied for the + // query plan request. The expectation is that the query plan request in the first region won't finish, + // the query plan will then be retrieved from the second region but the actual query is executed against the + // first region. + new Object[] { + "DefaultPageSize_SinglePartition_QueryPLanHighLatency_EagerAvailabilityStrategy", + THREE_SECOND_DURATION, + reluctantThresholdAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectQueryPlanTransitTimeoutIntoFirstRegionOnly, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxTwoRegions, + validateCtxQueryPlan, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isGreaterThanOrEqualTo(3); + + // Ensure that the query plan has been retrieved from the second region + assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); + assertThat(diagnostics[0].getClientSideRequestStatistics()).isNotNull(); + assertThat(diagnostics[0].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); + ClientSideRequestStatistics requestStats = diagnostics[0].getClientSideRequestStatistics().iterator().next(); + assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); + assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); + assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); + assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(408); + + // Ensure that the query plan has been retrieved from the second region + assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); + assertThat(diagnostics[1].getClientSideRequestStatistics()).isNotNull(); + assertThat(diagnostics[1].getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); + requestStats = diagnostics[1].getClientSideRequestStatistics().iterator().next(); + assertThat(requestStats.getGatewayStatisticsList()).isNotNull(); + assertThat(requestStats.getGatewayStatisticsList().size()).isGreaterThanOrEqualTo(1); + assertThat(requestStats.getGatewayStatisticsList().iterator().next().getOperationType()).isEqualTo(OperationType.QueryPlan); + assertThat(requestStats.getGatewayStatisticsList().iterator().next().getStatusCode()).isEqualTo(200); + + + // There possibly is an incomplete diagnostics for the failed query plan retrieval in the first region + // Last Diagnostics should be for processed request against the first region with the + // query plan retrieved from the second region + boolean found = false; + for (int i = 2; i < diagnostics.length; i++) { + if (diagnostics[i].getFeedResponseDiagnostics() != null && + diagnostics[i].getFeedResponseDiagnostics().getQueryMetricsMap() != null) { + + found = true; + assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); + assertThat(diagnostics[i].getFeedResponseDiagnostics().getClientSideRequestStatistics().size()).isGreaterThanOrEqualTo(1); + assertThat(diagnostics[i].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[i].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); + } + } + + assertThat(found).isEqualTo(true); + } + ), + null, + validateExactlyOneRecordReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + // Simple single partition query - 429/3200 injected into all partition of the first region + // Eager availability strategy - so, the expectation is that the + // hedging will provide a successful response. There should only be a single CosmosDiagnosticsContext + // (and page) - but it should have three CosmosDiagnostics instances - first for query plan, second for + // the attempt in the first region and third one for hedging returning successful response. + new Object[] { + "DefaultPageSize_SinglePartition_429-3200_OnlyFirstRegion_LocalPreferred_EagerAvailabilityStrategy", + TWO_SECOND_DURATION, + eagerThresholdAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectRequestRateTooLargeIntoFirstRegionOnly, + validateStatusCodeIs200Ok, + 1, + ArrayUtils.toArray( + validateCtxTwoRegions, + validateCtxFirstRegionFailureSecondRegionSuccessfulSingleFeedResponse, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isEqualTo(3); + + // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region + // (possibly also fail-over to secondary region) + assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); + assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) + .isEqualTo(true); + + // Ensure second FeedResponse CosmoDiagnostics has only requests to second region + assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) + .isEqualTo(true); + } + ), + null, + validateExactlyOneRecordReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + // Simple single partition query - 429/3200 injected into all regions + // Eager availability strategy - the expectation is that even with hedging, the request will time out + new Object[] { + "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_EagerAvailabilityStrategy", + TWO_SECOND_DURATION, + eagerThresholdAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectRequestRateTooLargeIntoAllRegions, + validateStatusCodeIsOperationCancelled, + 1, + ArrayUtils.toArray( + validateCtxTwoRegions, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isEqualTo(3); + + // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region + // (possibly also fail-over to secondary region) + assertThat(diagnostics[1].getContactedRegionNames().size()).isGreaterThanOrEqualTo(1); + assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) + .isEqualTo(true); + + // Ensure second FeedResponse CosmoDiagnostics has only requests to second region + assertThat(diagnostics[2].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[2].getContactedRegionNames().contains(SECOND_REGION_NAME)) + .isEqualTo(true); + } + ), + null, + null, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + + // Simple single partition query - 429/3200 injected into first region only + // no availability strategy - the expectation is that no hedging will happen, the request will time out + new Object[] { + "DefaultPageSize_SinglePartition_429-3200_AllRegions_LocalPreferred_noAvailabilityStrategy", + TWO_SECOND_DURATION, + noAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + ConnectionMode.DIRECT, + singlePartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectRequestRateTooLargeIntoAllRegions, + validateStatusCodeIsOperationCancelled, + 1, + ArrayUtils.toArray( + validateCtxSingleRegion, + (ctx) -> { + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics.length).isEqualTo(2); + + // Ensure first FeedResponse CosmoDiagnostics has at least requests to first region + // (possibly also fail-over to secondary region) + assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[1].getContactedRegionNames().contains(FIRST_REGION_NAME)) + .isEqualTo(true); + assertThat(diagnostics[1].clientSideRequestStatistics().getResponseStatisticsList().size()).isGreaterThan(1); + } + ), + null, + null, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + }, + // GATEWAY MODE + // ------------ + + // Simple cross partition query - 404/1002 injected into all partition of the first region + // RegionSwitchHint is remote - with reluctant availability strategy - so, the expectation is that the + // retry on the first region will provide a successful response and no hedging is happening. + // There should be one CosmosDiagnosticsContext (and page) per partition - each should only have + // a single CosmosDiagnostics instance contacting both regions. + new Object[] { + "GW_DefaultPageSize_CrossPartition_GW408_EagerAvailabilityStrategy", + THREE_SECOND_DURATION, + eagerThresholdAvailabilityStrategy, + noRegionSwitchHint, + ConnectionMode.GATEWAY, + crossPartitionQueryGenerator, + queryReturnsTotalRecordCountWithDefaultPageSize, + injectGatewayTransitTimeoutIntoFirstRegionOnly, + validateStatusCodeIs200Ok, + PHYSICAL_PARTITION_COUNT, + ArrayUtils.toArray( + validateCtxTwoRegions, // query plan 1st region, all queries 2nd region + validateCtxQueryPlan, + (ctx) -> { + assertThat(ctx.getDiagnostics()).isNotNull(); + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + + // Diagnostics of query attempt in first region not even available yet + assertThat(diagnostics.length).isEqualTo(2); + + // query plan on first region + assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(FIRST_REGION_NAME); + }, + (ctx) -> { + assertThat(ctx.getDiagnostics()).isNotNull(); + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[1].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[1].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); + assertThat(diagnostics[1].getFeedResponseDiagnostics()).isNotNull(); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); + assertThat(diagnostics[1].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); + ClientSideRequestStatistics[] clientStats = + diagnostics[1] + .getFeedResponseDiagnostics() + .getClientSideRequestStatistics() + .toArray(new ClientSideRequestStatistics[0]); + assertThat(clientStats.length).isEqualTo(1); + for (int i = 0; i < clientStats.length; i++) { + assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); + assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); + assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); + assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); + assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); + assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); + } + } + ), + ArrayUtils.toArray( + validateCtxSingleRegion, + (ctx) -> { + assertThat(ctx.getDiagnostics()).isNotNull(); + CosmosDiagnostics[] diagnostics = ctx.getDiagnostics().toArray(new CosmosDiagnostics[0]); + assertThat(diagnostics[0].getContactedRegionNames().size()).isEqualTo(1); + assertThat(diagnostics[0].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); + assertThat(diagnostics[0].getFeedResponseDiagnostics()).isNotNull(); + assertThat(diagnostics[0].getFeedResponseDiagnostics().getQueryMetricsMap()).isNotNull(); + assertThat(diagnostics[0].getFeedResponseDiagnostics().getClientSideRequestStatistics()).isNotNull(); + ClientSideRequestStatistics[] clientStats = + diagnostics[0] + .getFeedResponseDiagnostics() + .getClientSideRequestStatistics() + .toArray(new ClientSideRequestStatistics[0]); + assertThat(clientStats.length).isEqualTo(1); + for (int i = 0; i < clientStats.length; i++) { + assertThat(clientStats[i].getContactedRegionNames()).isNotNull(); + assertThat(clientStats[i].getContactedRegionNames().size()).isEqualTo(1); + assertThat(clientStats[i].getContactedRegionNames().iterator().next()).isEqualTo(SECOND_REGION_NAME); + assertThat(clientStats[i].getGatewayStatisticsList()).isNotNull(); + assertThat(clientStats[i].getResponseStatisticsList()).isNotNull(); + assertThat(clientStats[i].getResponseStatisticsList().size()).isEqualTo(0); + } + } + ), + validateAllRecordsSameIdReturned, + ENOUGH_DOCS_OTHER_PK_TO_HIT_EVERY_PARTITION, + NO_OTHER_DOCS_WITH_SAME_PK + } }; } - @Test(groups = {"multi-master"}, dataProvider = "testConfigs_queryAfterCreation", invocationCount = 5) + @Test(groups = {"multi-master"}, dataProvider = "testConfigs_queryAfterCreation") public void queryAfterCreation( String testCaseId, Duration endToEndTimeout, From 9a61204cf584ad3a7d6022476a051ae99d8961a8 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 20 May 2024 20:16:57 -0400 Subject: [PATCH 035/140] Fixing CI pipeline. --- .../azure/cosmos/implementation/ErrorKey.java | 11 --- ...tionLevelCircuitBreakerRequestContext.java | 80 ------------------- 2 files changed, 91 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java delete mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/LocationLevelCircuitBreakerRequestContext.java diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java deleted file mode 100644 index 3bdf9245a2c7..000000000000 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ErrorKey.java +++ /dev/null @@ -1,11 +0,0 @@ -package com.azure.cosmos.implementation; - -public class ErrorKey { - private final int statusCode; - private final int subStatusCode; - - public ErrorKey(int statusCode, int subStatusCode) { - this.statusCode = statusCode; - this.subStatusCode = subStatusCode; - } -} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/LocationLevelCircuitBreakerRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/LocationLevelCircuitBreakerRequestContext.java deleted file mode 100644 index 9eca8b2b9621..000000000000 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/LocationLevelCircuitBreakerRequestContext.java +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -package com.azure.cosmos.implementation; - -import java.net.URI; -import java.util.concurrent.ConcurrentHashMap; - -public class LocationLevelCircuitBreakerRequestContext { - - private final ConcurrentHashMap> failuresForAllLocations; - private final boolean isRegionLevelCircuitBreakerEnabled; - - public LocationLevelCircuitBreakerRequestContext(boolean isRegionLevelCircuitBreakerEnabled) { - this.failuresForAllLocations = new ConcurrentHashMap<>(); - this.isRegionLevelCircuitBreakerEnabled = isRegionLevelCircuitBreakerEnabled; - } - - public boolean tryRecordRegionScopedFailure(URI locationEndpointToRoute, int statusCode, int subStatusCode) { - if (isRegionScopedFailure(statusCode, subStatusCode)) { - failuresForAllLocations.compute(locationEndpointToRoute, ((uri, errorKeyToCount) -> { - - if (errorKeyToCount == null) { - errorKeyToCount = new ConcurrentHashMap<>(); - errorKeyToCount.put(new ErrorKey(statusCode, subStatusCode), 1); - return errorKeyToCount; - } - - errorKeyToCount.compute(new ErrorKey(statusCode, subStatusCode), (errorKey, count) -> { - - if (count == null) { - count = 1; - return count; - } - - return count + 1; - }); - - return errorKeyToCount; - })); - - return true; - } - return false; - } - - private static boolean isRegionScopedFailure(int statusCode, int subStatusCode) { - - if (statusCode == HttpConstants.StatusCodes.GONE - && subStatusCode == HttpConstants.SubStatusCodes.COMPLETING_PARTITION_MIGRATION) { - return true; - } - - if (statusCode == HttpConstants.StatusCodes.GONE - && subStatusCode == HttpConstants.SubStatusCodes.COMPLETING_PARTITION_MIGRATION_EXCEEDED_RETRY_LIMIT) { - return true; - } - - if (statusCode == HttpConstants.StatusCodes.GONE - && subStatusCode == HttpConstants.SubStatusCodes.SERVER_GENERATED_410) { - return true; - } - - if (statusCode == HttpConstants.StatusCodes.SERVICE_UNAVAILABLE - && subStatusCode == HttpConstants.SubStatusCodes.SERVER_GENERATED_503) { - return true; - } - - if (statusCode == HttpConstants.StatusCodes.SERVICE_UNAVAILABLE - && subStatusCode == HttpConstants.SubStatusCodes.SERVER_GENERATED_503) { - return true; - } - - return false; - } - - public ConcurrentHashMap> getFailuresForAllLocations() { - return this.failuresForAllLocations; - } -} From ebadbf395a274682b494ffc191bba77a14d60265 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 21 May 2024 09:17:26 -0400 Subject: [PATCH 036/140] Fixing CI pipeline. --- .../java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java index 9e6416bbe398..8689e14b1696 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java @@ -229,7 +229,7 @@ private Flux writeDocument(Integer i) { return false; }, - (conflictException) -> client.readDocument(getDocumentLink(document), null) + (conflictException) -> client.readDocument(getDocumentLink(document), null, getCollectionLink()) ) .doOnNext(r -> cache.put(key, r.getResource())) .map(ResourceResponse::getResource).flux(); @@ -245,7 +245,7 @@ private Flux readDocument(Document d) { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(d.getString(partitionKey))); - return client.readDocument(getDocumentLink(d), options) + return client.readDocument(getDocumentLink(d), options, getCollectionLink()) .map(ResourceResponse::getResource).flux(); } From 0eca2312b3085fa44fa9cbc15f211f687c2fe4e1 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 21 May 2024 17:54:30 -0400 Subject: [PATCH 037/140] Integrating circuit breaking behavior for change feed. --- .../PartitionLevelCircuitBreakerTests.java | 304 ++---------------- .../implementation/ChangeFeedQueryImpl.java | 64 +++- .../DocumentServiceRequestContext.java | 10 + .../ImplementationBridgeHelpers.java | 8 + .../implementation/RxDocumentClientImpl.java | 32 +- .../query/ChangeFeedFetcher.java | 2 + .../cosmos/implementation/query/Fetcher.java | 5 +- .../implementation/query/Paginator.java | 5 +- .../CosmosChangeFeedRequestOptions.java | 38 +++ 9 files changed, 187 insertions(+), 281 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index f4614b795433..b3705634524d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -20,6 +20,7 @@ import com.azure.cosmos.implementation.guava25.base.Function; import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchResponse; +import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; import com.azure.cosmos.models.CosmosContainerIdentity; import com.azure.cosmos.models.CosmosContainerProperties; import com.azure.cosmos.models.CosmosItemIdentity; @@ -108,6 +109,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { {FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, {FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, {FaultInjectionOperationType.BATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, + {FaultInjectionOperationType.READ_FEED_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, @@ -123,8 +125,8 @@ public Object[][] readManyTestConfigs() { return new Object[][] { {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofMinutes(6), false, false}, {FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, -// {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, -// {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, true}, + {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, + {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, true}, }; } @@ -219,6 +221,7 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( assertThat(faultInjectedFunc).isNotNull().as("faultInjectedFunc cannot be null!"); + if (shouldEndToEndTimeoutBeInjected) { CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = (shouldThresholdBasedAvailabilityStrategyBeEnabled) ? @@ -236,6 +239,7 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( } operationInvocationParamsWrapper.asyncContainer = container; + operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = faultyFeedRange; CosmosFaultInjectionHelper .configureFaultInjectionRules(operationInvocationParamsWrapper.asyncContainer, Arrays.asList(faultInjectionRule)) @@ -724,6 +728,7 @@ private static int getTestObjectCountToBootstrapFrom(FaultInjectionOperationType case REPLACE_ITEM: case QUERY_ITEM: case PATCH_ITEM: + case READ_FEED_ITEM: return 1; case DELETE_ITEM: return 2 * opCount; @@ -945,6 +950,30 @@ private static Function(cosmosException); } + throw ex; + } + }; + case READ_FEED_ITEM: + return (paramsWrapper) -> { + + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + + try { + + FeedResponse feedResponseFromChangeFeed = asyncContainer.queryChangeFeed( + CosmosChangeFeedRequestOptions.createForProcessingFromBeginning(paramsWrapper.feedRangeToDrainForChangeFeed), + TestObject.class) + .byPage() + .blockLast(); + + return new OperationExecutionResult<>(feedResponseFromChangeFeed); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new OperationExecutionResult<>(cosmosException); + } + throw ex; } }; @@ -964,6 +993,7 @@ private static int getProvisionedThroughputForContainer(FaultInjectionOperationT case DELETE_ITEM: case CREATE_ITEM: case BATCH_ITEM: + case READ_FEED_ITEM: return 6_000; default: throw new UnsupportedOperationException(String.format("Operation of type : %s is not supported", faultInjectionOperationType)); @@ -973,274 +1003,7 @@ private static int getProvisionedThroughputForContainer(FaultInjectionOperationT @Test(groups = {"multi-master"}) public void operationHitsServiceUnavailableInSecondPreferredRegion() {} - @Test(groups = {"multi-master"}) - public void queryWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion() { - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("queryWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); - } - - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build(); - - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - - String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; - - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(50_000); - - try { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); - - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); - - Thread.sleep(10_000); - - TestObject testObject = TestObject.create(); - - String itemIdMappingToUnhealthyPartition = testObject.getId(); - - container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); - - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.QUERY_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) - .region(preferredRegions.get(0)) - .build(); - - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.GONE) - .build(); - - FaultInjectionRule goneExceptionRule = new FaultInjectionRuleBuilder("gone-exception-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(Duration.ofSeconds(45)) - .build(); - - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(goneExceptionRule)) - .block(); - - String sqlQuery = "SELECT * FROM C"; - - for (int i = 1; i <= 15; i++) { - FeedResponse response = container - .queryItems( - sqlQuery, - new CosmosQueryRequestOptions() - .setPartitionKey(new PartitionKey(itemIdMappingToUnhealthyPartition)) - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), - TestObject.class) - .byPage() - .onErrorResume(throwable -> { - if (throwable instanceof OperationCancelledException) { - logger.error("OperationCancelledException thrown!"); - } - - return Flux.empty(); - }) - .blockLast(); - - logger.info("Hit count : {}", goneExceptionRule.getHitCount()); - - if (response != null) { - assertThat(response).isNotNull(); - assertThat(response.getCosmosDiagnostics()).isNotNull(); - - response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - - logger.info("Sleep for 120 seconds"); - Thread.sleep(120_000); - - for (int i = 1; i <= 30; i++) { - FeedResponse response = container - .queryItems( - sqlQuery, - new CosmosQueryRequestOptions() - .setPartitionKey(new PartitionKey(itemIdMappingToUnhealthyPartition)) - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), - TestObject.class) - .byPage() - .onErrorResume(throwable -> { - if (throwable instanceof OperationCancelledException) { - logger.error("OperationCancelledException thrown!"); - } - - return Flux.empty(); - }) - .blockLast(); - - logger.info("Hit count : {}", goneExceptionRule.getHitCount()); - - if (response != null) { - assertThat(response).isNotNull(); - assertThat(response.getCosmosDiagnostics()).isNotNull(); - - response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Query operations should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); - } - } - - @Test(groups = {"multi-master"}) - public void readWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion() { - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("readWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); - } - - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build(); - - CosmosAsyncClient client = clientBuilder.endToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg).buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - - String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; - - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(12_000); - - try { - - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); - - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); - - Thread.sleep(10_000); - - TestObject testObject = TestObject.create(); - - String itemIdMappingToUnhealthyPartition = testObject.getId(); - - container.createItem(testObject, new PartitionKey(itemIdMappingToUnhealthyPartition), new CosmosItemRequestOptions()).block(); - - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.READ_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(itemIdMappingToUnhealthyPartition))).build()) - .region(preferredRegions.get(0)) - .build(); - - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.GONE) - .build(); - - FaultInjectionRule goneExceptionRule = new FaultInjectionRuleBuilder("gone-exception-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(Duration.ofSeconds(45)) - .build(); - - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(goneExceptionRule)) - .block(); - - for (int i = 1; i <= 15; i++) { - CosmosItemResponse response = container.readItem( - itemIdMappingToUnhealthyPartition, - new PartitionKey(itemIdMappingToUnhealthyPartition), - new CosmosItemRequestOptions().setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), - TestObject.class - ) - .onErrorResume(throwable -> { - if (throwable instanceof OperationCancelledException) { - logger.error("OperationCancelledException thrown!"); - } - - return Mono.empty(); - }) - .block(); - - logger.info("Hit count : {}", goneExceptionRule.getHitCount()); - - if (response != null) { - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - - logger.info("Sleep for 120 seconds"); - - Thread.sleep(120_000); - - for (int i = 1; i <= 30; i++) { - CosmosItemResponse response = container.readItem( - itemIdMappingToUnhealthyPartition, - new PartitionKey(itemIdMappingToUnhealthyPartition), - new CosmosItemRequestOptions().setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg), - TestObject.class - ) - .onErrorResume(throwable -> { - if (throwable instanceof OperationCancelledException) { - logger.error("OperationCancelledException thrown!"); - } - - return Mono.empty(); - }) - .block(); - - logger.info("Hit count : {}", goneExceptionRule.getHitCount()); - - if (response != null) { - assertThat(response).isNotNull(); - assertThat(response.getDiagnostics()).isNotNull(); - - response.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Read operations should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); - } - } - - static class OperationExecutionResult { + private static class OperationExecutionResult { private final CosmosItemResponse cosmosItemResponse; private final CosmosException cosmosException; @@ -1282,6 +1045,7 @@ private static class OperationInvocationParamsWrapper { public CosmosItemRequestOptions itemRequestOptions; public CosmosQueryRequestOptions queryRequestOptions; public CosmosItemRequestOptions patchItemRequestOptions; + public FeedRange feedRangeToDrainForChangeFeed; } private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java index a6699f7c26db..3ca3ff747f77 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java @@ -18,6 +18,7 @@ import java.util.HashMap; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import java.util.function.Supplier; @@ -29,6 +30,9 @@ class ChangeFeedQueryImpl { ImplementationBridgeHelpers.FeedResponseHelper.FeedResponseAccessor feedResponseAccessor = ImplementationBridgeHelpers.FeedResponseHelper.getFeedResponseAccessor(); + private final static ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.CosmosChangeFeedRequestOptionsAccessor changeFeedRequestOptionsAccessor = + ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.getCosmosChangeFeedRequestOptionsAccessor(); + private static final int INITIAL_TOP_VALUE = -1; private final RxDocumentClientImpl client; @@ -110,7 +114,7 @@ public Flux> executeAsync() { .CosmosChangeFeedRequestOptionsHelper .getCosmosChangeFeedRequestOptionsAccessor() .getOperationContext(this.options) - ); + ); } private RxDocumentServiceRequest createDocumentServiceRequest() { @@ -139,6 +143,7 @@ private RxDocumentServiceRequest createDocumentServiceRequest() { if (request.requestContext != null) { request.requestContext.setExcludeRegions(options.getExcludedRegions()); + request.requestContext.setFeedOperationContext(new FeedOperationContext(new ConcurrentHashMap<>(), false)); } return request; @@ -146,7 +151,34 @@ private RxDocumentServiceRequest createDocumentServiceRequest() { private Mono> executeRequestAsync(RxDocumentServiceRequest request) { if (this.operationContextAndListener == null) { - return client.readFeed(request) + return Mono.just(request) + .flatMap(req -> client.populateHeadersAsync(req, RequestVerb.GET)) + .flatMap(req -> client.getCollectionCache().resolveCollectionAsync(null, req) + .flatMap(documentCollectionValueHolder -> { + + checkNotNull(documentCollectionValueHolder, "documentCollectionValueHolder cannot be null!"); + checkNotNull(documentCollectionValueHolder.v, "documentCollectionValueHolder.v cannot be null!"); + + return client.getPartitionKeyRangeCache().tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + + checkNotNull(collectionRoutingMapValueHolder, "collectionRoutingMapValueHolder cannot be null!"); + checkNotNull(collectionRoutingMapValueHolder.v, "collectionRoutingMapValueHolder.v cannot be null!"); + + changeFeedRequestOptionsAccessor.setPartitionKeyDefinition(options, documentCollectionValueHolder.v.getPartitionKey()); + changeFeedRequestOptionsAccessor.setCollectionRid(options, documentCollectionValueHolder.v.getResourceId()); + + client.addPartitionLevelUnavailableRegionsForChangeFeedRequest(req, options, collectionRoutingMapValueHolder.v); + + if (req.requestContext.getClientRetryPolicySupplier() != null) { + DocumentClientRetryPolicy documentClientRetryPolicy = req.requestContext.getClientRetryPolicySupplier().get(); + documentClientRetryPolicy.onBeforeSendRequest(req); + } + + return Mono.just(req); + }); + })) + .flatMap(client::readFeed) .map(rsp -> feedResponseAccessor.createChangeFeedResponse(rsp, this.itemSerializer, klass)); } else { final OperationListener listener = operationContextAndListener.getOperationListener(); @@ -156,7 +188,33 @@ private Mono> executeRequestAsync(RxDocumentServiceRequest reque .put(HttpConstants.HttpHeaders.CORRELATED_ACTIVITY_ID, operationContext.getCorrelationActivityId()); listener.requestListener(operationContext, request); - return client.readFeed(request) + return Mono.just(request) + .flatMap(req -> client.populateHeadersAsync(req, RequestVerb.GET)) + .flatMap(req -> client.getCollectionCache().resolveCollectionAsync(null, req) + .flatMap(documentCollectionValueHolder -> { + + checkNotNull(documentCollectionValueHolder, "documentCollectionValueHolder cannot be null!"); + checkNotNull(documentCollectionValueHolder.v, "documentCollectionValueHolder.v cannot be null!"); + + return client.getPartitionKeyRangeCache().tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) + .flatMap(collectionRoutingMapValueHolder -> { + + checkNotNull(collectionRoutingMapValueHolder, "collectionRoutingMapValueHolder cannot be null!"); + checkNotNull(collectionRoutingMapValueHolder.v, "collectionRoutingMapValueHolder.v cannot be null!"); + + changeFeedRequestOptionsAccessor.setPartitionKeyDefinition(options, documentCollectionValueHolder.v.getPartitionKey()); + changeFeedRequestOptionsAccessor.setCollectionRid(options, documentCollectionValueHolder.v.getResourceId()); + + client.addPartitionLevelUnavailableRegionsForChangeFeedRequest(req, options, collectionRoutingMapValueHolder.v); + + if (req.requestContext.getClientRetryPolicySupplier() != null) { + DocumentClientRetryPolicy documentClientRetryPolicy = req.requestContext.getClientRetryPolicySupplier().get(); + documentClientRetryPolicy.onBeforeSendRequest(req); + } + + return Mono.just(req); + }); + })).flatMap(client::readFeed) .map(rsp -> { listener.responseListener(operationContext, rsp); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index 1f05c6297663..456e0455be65 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -19,6 +19,7 @@ import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Supplier; public class DocumentServiceRequestContext implements Cloneable { public volatile boolean forceAddressRefresh; @@ -61,6 +62,7 @@ public class DocumentServiceRequestContext implements Cloneable { private PointOperationContext pointOperationContext; private FeedOperationContext feedOperationContext; + private volatile Supplier clientRetryPolicySupplier; public DocumentServiceRequestContext() {} @@ -214,5 +216,13 @@ public void setApproximateBloomFilterInsertionCount(long approximateBloomFilterI public Set getSessionTokenEvaluationResults() { return sessionTokenEvaluationResults; } + + public Supplier getClientRetryPolicySupplier() { + return clientRetryPolicySupplier; + } + + public void setClientRetryPolicySupplier(Supplier clientRetryPolicySupplier) { + this.clientRetryPolicySupplier = clientRetryPolicySupplier; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index b7098089b471..9db179467c04 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -375,6 +375,14 @@ public interface CosmosChangeFeedRequestOptionsAccessor { CosmosChangeFeedRequestOptions createForProcessingFromContinuation(String continuation, FeedRange targetRange, String continuationLsn); CosmosChangeFeedRequestOptions clone(CosmosChangeFeedRequestOptions toBeCloned); + + String getCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions); + + void setCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions, String collectionRid); + + PartitionKeyDefinition getPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions); + + void setPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions, PartitionKeyDefinition partitionKeyDefinition); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 3b7bac84d2fc..9a6780421ba4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1961,7 +1961,7 @@ private RxDocumentServiceRequest addBatchHeaders(RxDocumentServiceRequest reques * @param httpMethod http method * @return Mono, which on subscription will populate the headers in the request passed in the argument. */ - private Mono populateHeadersAsync(RxDocumentServiceRequest request, RequestVerb httpMethod) { + public Mono populateHeadersAsync(RxDocumentServiceRequest request, RequestVerb httpMethod) { request.getHeaders().put(HttpConstants.HttpHeaders.X_DATE, Utils.nowAsRFC1123()); if (this.masterKeyOrResourceToken != null || this.resourceTokensMap != null || this.cosmosAuthorizationTokenResolver != null || this.credential != null) { @@ -5689,7 +5689,7 @@ static UUID randomUuid(long msb, long lsb) { return new UUID(msb, lsb); } - private void addPartitionLevelUnavailableRegionsForRequest( + public void addPartitionLevelUnavailableRegionsForRequest( RxDocumentServiceRequest request, RequestOptions options, CollectionRoutingMap collectionRoutingMap) { @@ -5714,7 +5714,7 @@ private void addPartitionLevelUnavailableRegionsForRequest( } } - private void addPartitionLevelUnavailableRegionsForFeedRequest( + public void addPartitionLevelUnavailableRegionsForFeedRequest( RxDocumentServiceRequest request, CosmosQueryRequestOptions options, CollectionRoutingMap collectionRoutingMap) { @@ -5741,6 +5741,32 @@ private void addPartitionLevelUnavailableRegionsForFeedRequest( } } + public void addPartitionLevelUnavailableRegionsForChangeFeedRequest( + RxDocumentServiceRequest request, + CosmosChangeFeedRequestOptions options, + CollectionRoutingMap collectionRoutingMap) { + checkNotNull(collectionRoutingMap, "collectionRoutingMap cannot be null!"); + + PartitionKeyRange resolvedPartitionKeyRange = null; + + if (request.getPartitionKeyRangeIdentity() != null) { + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByPartitionKeyRangeId(request.getPartitionKeyRangeIdentity().getPartitionKeyRangeId()); + } else if (request.getPartitionKeyInternal() != null) { + String effectivePartitionKeyString = PartitionKeyInternalHelper.getEffectivePartitionKeyString(request.getPartitionKeyInternal(), ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.getCosmosChangeFeedRequestOptionsAccessor().getPartitionKeyDefinition(options)); + resolvedPartitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); + } + + checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); + + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), resolvedPartitionKeyRange); + List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); + + request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); + } + } + private Mono> wrapPointOperationWithAvailabilityStrategy( ResourceType resourceType, OperationType operationType, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java index 466454b8e3b5..b7775732a36a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java @@ -89,8 +89,10 @@ public ChangeFeedFetcher( requestOptionProperties, retryPolicyInstance.getRetryContext(), () -> this.getOperationContextText()); + this.createRequestFunc = () -> { RxDocumentServiceRequest request = createRequestFunc.get(); + request.requestContext.setClientRetryPolicySupplier(() -> this.feedRangeContinuationFeedRangeGoneRetryPolicy); this.feedRangeContinuationFeedRangeGoneRetryPolicy.onBeforeSendRequest(request); return request; }; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index c8452ffeb450..baee14719b45 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -5,6 +5,7 @@ import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.implementation.Configs; +import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.FeedOperationContext; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; @@ -174,7 +175,7 @@ private Mono> nextPage(RxDocumentServiceRequest request) { .doOnNext(response -> { completed.set(true); - if (request.getResourceType() == ResourceType.Document) { + if (request.getResourceType() == ResourceType.Document && Configs.isPartitionLevelCircuitBreakerEnabled()) { FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); } @@ -193,7 +194,7 @@ private Mono> nextPage(RxDocumentServiceRequest request) { return; } - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + if (request.getResourceType() == ResourceType.Document && Configs.isPartitionLevelCircuitBreakerEnabled()) { FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java index 0faaa5162f40..a8355b40e369 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java @@ -6,7 +6,6 @@ import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; -import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; @@ -22,7 +21,6 @@ import java.util.List; import java.util.Map; -import java.util.Set; import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.Supplier; @@ -111,7 +109,8 @@ public static Flux> getChangeFeedQueryResultAsObservable( top, maxPageSize, isSplitHandlingDisabled, - operationContext), + operationContext + ), preFetchCount); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java index 0973de7829c9..c51425bb9c98 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java @@ -52,6 +52,8 @@ public final class CosmosChangeFeedRequestOptions { private CosmosDiagnosticsThresholds thresholds; private List excludeRegions; private CosmosItemSerializer customSerializer; + private String collectionRid; + private PartitionKeyDefinition partitionKeyDefinition; CosmosChangeFeedRequestOptions(CosmosChangeFeedRequestOptions topBeCloned) { this.continuationState = topBeCloned.continuationState; @@ -625,6 +627,22 @@ private void addCustomOptionsForFullFidelityMode() { HttpConstants.ChangeFeedWireFormatVersions.SEPARATE_METADATA_WITH_CRTS); } + public String getCollectionRid() { + return collectionRid; + } + + public void setCollectionRid(String collectionRid) { + this.collectionRid = collectionRid; + } + + public PartitionKeyDefinition getPartitionKeyDefinition() { + return partitionKeyDefinition; + } + + public void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + this.partitionKeyDefinition = partitionKeyDefinition; + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// /////////////////////////////////////////////////////////////////////////////////////////// @@ -684,6 +702,26 @@ public CosmosChangeFeedRequestOptions createForProcessingFromContinuation( public CosmosChangeFeedRequestOptions clone(CosmosChangeFeedRequestOptions toBeCloned) { return new CosmosChangeFeedRequestOptions(toBeCloned); } + + @Override + public String getCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions) { + return changeFeedRequestOptions.getCollectionRid(); + } + + @Override + public void setCollectionRid(CosmosChangeFeedRequestOptions changeFeedRequestOptions, String collectionRid) { + changeFeedRequestOptions.setCollectionRid(collectionRid); + } + + @Override + public PartitionKeyDefinition getPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions) { + return changeFeedRequestOptions.getPartitionKeyDefinition(); + } + + @Override + public void setPartitionKeyDefinition(CosmosChangeFeedRequestOptions changeFeedRequestOptions, PartitionKeyDefinition partitionKeyDefinition) { + changeFeedRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); + } }); } From cf8bcb71ffd0f7acd703732829be563144a9c343 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 21 May 2024 19:19:25 -0400 Subject: [PATCH 038/140] Integrating circuit breaking behavior for change feed. --- .../implementation/RxDocumentClientImpl.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 9a6780421ba4..6cfb3e60792e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2337,10 +2337,10 @@ private Mono handleRegionFeedbackForPointOperation( if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { - this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); + this.handleLocationExceptionForPartitionKeyRange(failedRequest); } } else { - this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); + this.handleLocationExceptionForPartitionKeyRange(failedRequest); } } } @@ -2354,10 +2354,10 @@ private Mono handleRegionFeedbackForPointOperation( if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { - this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); + this.handleLocationExceptionForPartitionKeyRange(failedRequest); } } else { - this.tryMarkPartitionKeyRangeAsUnavailableForRegion(failedRequest); + this.handleLocationExceptionForPartitionKeyRange(failedRequest); } } }); @@ -6308,12 +6308,14 @@ private Mono executeFeedOperationWithAvailabilityStrategy( }); } - private void tryMarkPartitionKeyRangeAsUnavailableForRegion(RxDocumentServiceRequest failedRequest) { + private void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { - URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); + URI firstContactedLocationEndpoint = diagnosticsAccessor + .getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); if (firstContactedLocationEndpoint != null) { - this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); + this.globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(failedRequest, firstContactedLocationEndpoint); } } From e26c963ece34149984eab86bca8756ee53b1e98a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 22 May 2024 17:56:43 -0400 Subject: [PATCH 039/140] Added separate exception / success counters for write / non-write operations. --- ...itionEndpointManagerForCircuitBreaker.java | 312 +++++++++++++----- 1 file changed, 232 insertions(+), 80 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 29ffec4c0d98..b1c8c1fbbab1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -71,31 +71,34 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); } - isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.handleException(partitionKeyRangeWrapperAsKey, failedLocation)); + isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.handleException(partitionKeyRangeWrapperAsKey, failedLocation, request.isReadOnlyRequest())); if (isFailureThresholdBreached.get()) { - UnmodifiableList applicableEndpoints = request.isReadOnly() ? + UnmodifiableList applicableEndpoints = request.isReadOnlyRequest() ? this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); isFailoverPossible.set( - partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapperAsKey, applicableEndpoints)); + partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapperAsKey, applicableEndpoints, request.isReadOnlyRequest())); } return partitionKeyRangeFailoverInfoAsVal; }); } else { - isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoSnapshot.handleException(partitionKeyRangeWrapper, failedLocation)); + isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoSnapshot.handleException(partitionKeyRangeWrapper, failedLocation, request.isReadOnlyRequest())); if (isFailureThresholdBreached.get()) { - UnmodifiableList applicableEndpoints = request.isReadOnly() ? + UnmodifiableList applicableEndpoints = request.isReadOnlyRequest() ? this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); isFailoverPossible.set( - partitionLevelLocationUnavailabilityInfoSnapshot.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapper, applicableEndpoints)); + partitionLevelLocationUnavailabilityInfoSnapshot.areLocationsAvailableForPartitionKeyRange( + partitionKeyRangeWrapper, + applicableEndpoints, + request.isReadOnlyRequest())); } } @@ -132,7 +135,10 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { - partitionLevelLocationUnavailabilityInfoSnapshot.handleSuccess(partitionKeyRangeWrapper, succeededLocation); + partitionLevelLocationUnavailabilityInfoSnapshot.handleSuccess( + partitionKeyRangeWrapper, + succeededLocation, + request.isReadOnlyRequest()); } } @@ -164,7 +170,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso doesPartitionHaveUnavailableLocations = true; } else if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.StaleUnavailable) { doesPartitionHaveUnavailableLocations = true; - } else if (locationSpecificContext.exceptionCount >= 1) { + } else if (locationSpecificContext.exceptionCountForWrite >= 1) { doesPartitionHaveUnavailableLocations = true; } } @@ -199,7 +205,13 @@ private Flux updateStaleLocationInfo() { partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { if (locationSpecificContextAsVal != null) { - locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationContextTransitionHandler.handleSuccess(locationSpecificContextAsVal, partitionKeyRangeWrapper, locationWithStaleUnavailabilityInfoAsKey, false); + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + locationWithStaleUnavailabilityInfoAsKey, + false, + true); } return locationSpecificContextAsVal; @@ -221,18 +233,22 @@ private class PartitionLevelLocationUnavailabilityInfo { this.locationEndpointToLocationSpecificContextForPartition = new ConcurrentHashMap<>(); } - public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException) { + public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException, boolean isReadOnlyRequest) { AtomicBoolean isExceptionThresholdBreached = new AtomicBoolean(false); this.locationEndpointToLocationSpecificContextForPartition.compute(locationWithException, (locationAsKey, locationSpecificContextAsVal) -> { if (locationSpecificContextAsVal == null) { - locationSpecificContextAsVal = new LocationSpecificContext(0, 0, Instant.MAX, LocationUnavailabilityStatus.Available, false); + locationSpecificContextAsVal = new LocationSpecificContext(0, 0, 0, 0, Instant.MAX, LocationUnavailabilityStatus.Available, false); } LocationSpecificContext locationSpecificContextAfterTransition = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationContextTransitionHandler.handleException(locationSpecificContextAsVal, partitionKeyRangeWrapper, locationWithException); + .this.locationContextTransitionHandler.handleException( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + locationWithException, + isReadOnlyRequest); isExceptionThresholdBreached.set(locationSpecificContextAfterTransition.isExceptionThresholdBreached()); return locationSpecificContextAfterTransition; @@ -241,19 +257,24 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper return isExceptionThresholdBreached.get(); } - public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation) { + public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation, boolean isReadOnlyRequest) { this.locationEndpointToLocationSpecificContextForPartition.compute(succeededLocation, (locationAsKey, locationSpecificContextAsVal) -> { if (locationSpecificContextAsVal != null) { locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationContextTransitionHandler.handleSuccess(locationSpecificContextAsVal, partitionKeyRangeWrapper, succeededLocation, false); + .this.locationContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + succeededLocation, + false, + isReadOnlyRequest); } return locationSpecificContextAsVal; }); } - public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper, List availableLocationsAtAccountLevel) { + public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper, List availableLocationsAtAccountLevel, boolean isReadOnlyRequest) { for (URI availableLocation : availableLocationsAtAccountLevel) { if (!this.locationEndpointToLocationSpecificContextForPartition.containsKey(availableLocation)) { @@ -292,7 +313,13 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe this.locationEndpointToLocationSpecificContextForPartition.compute(mostStaleUnavailableLocation, (mostStaleUnavailableLocationAsKey, locationSpecificStatusAsVal) -> { if (locationSpecificStatusAsVal != null) { - locationSpecificStatusAsVal = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationContextTransitionHandler.handleSuccess(locationSpecificStatusAsVal, partitionKeyRangeWrapper, mostStaleUnavailableLocationAsKey, true); + locationSpecificStatusAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationContextTransitionHandler.handleSuccess( + locationSpecificStatusAsVal, + partitionKeyRangeWrapper, + mostStaleUnavailableLocationAsKey, + true, + isReadOnlyRequest); } return locationSpecificStatusAsVal; @@ -304,21 +331,27 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe } private static class LocationSpecificContext { - private final int exceptionCount; - private final int successCount; + private final int exceptionCountForWrite; + private final int successCountForWrite; + private final int exceptionCountForRead; + private final int successCountForRead; private final Instant unavailableSince; private final LocationUnavailabilityStatus locationUnavailabilityStatus; private final boolean isExceptionThresholdBreached; public LocationSpecificContext( - int successCount, - int exceptionCount, + int successCountForWrite, + int exceptionCountForWrite, + int successCountForRead, + int exceptionCountForRead, Instant unavailableSince, LocationUnavailabilityStatus locationUnavailabilityStatus, boolean isExceptionThresholdBreached) { - this.successCount = successCount; - this.exceptionCount = exceptionCount; + this.successCountForWrite = successCountForWrite; + this.exceptionCountForWrite = exceptionCountForWrite; + this.exceptionCountForRead = exceptionCountForRead; + this.successCountForRead = successCountForRead; this.unavailableSince = unavailableSince; this.locationUnavailabilityStatus = locationUnavailabilityStatus; this.isExceptionThresholdBreached = isExceptionThresholdBreached; @@ -340,50 +373,85 @@ public LocationSpecificContext handleSuccess( LocationSpecificContext locationSpecificContext, PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithSuccess, - boolean forceStatusChange) { + boolean forceStatusChange, + boolean isReadOnlyRequest) { logger.info("Handling success"); LocationUnavailabilityStatus currentStatusSnapshot = locationSpecificContext.locationUnavailabilityStatus; - double allowedFailureRatio = getAllowedFailureRatioByStatus(currentStatusSnapshot); + double allowedFailureRatio = getAllowedExceptionToSuccessRatio(currentStatusSnapshot, isReadOnlyRequest); - int exceptionCountActual = locationSpecificContext.exceptionCount; - int successCountActual = locationSpecificContext.successCount; + int minSuccessCountForStatusUpgrade = getMinimumSuccessCountForStatusUpgrade(currentStatusSnapshot, isReadOnlyRequest); + + int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.exceptionCountForWrite; + int successCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.successCountForWrite; switch (currentStatusSnapshot) { case Available: if (!forceStatusChange) { if (exceptionCountActual > 0) { + exceptionCountActual -= 1; - return new LocationSpecificContext( - locationSpecificContext.successCount, - exceptionCountActual, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached - ); + + if (isReadOnlyRequest) { + return new LocationSpecificContext( + locationSpecificContext.successCountForWrite, + locationSpecificContext.exceptionCountForWrite, + locationSpecificContext.successCountForRead, + exceptionCountActual, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } else { + return new LocationSpecificContext( + locationSpecificContext.successCountForWrite, + exceptionCountActual, + locationSpecificContext.successCountForRead, + locationSpecificContext.exceptionCountForRead, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } } } + break; case StaleUnavailable: if (!forceStatusChange) { + successCountActual += 1; + logger.info("Try to switch to Available but actual success count : {}", successCountActual); - if (successCountActual > 10 && (double) exceptionCountActual / (double) successCountActual < allowedFailureRatio) { + + if (successCountActual > minSuccessCountForStatusUpgrade && (double) exceptionCountActual / (double) successCountActual < allowedFailureRatio) { logger.info("Partition {}-{} of collection : {} marked as Available from StaleUnavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, OperationType.Read)); + .getRegionName(locationWithSuccess, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.Available); } else { - return new LocationSpecificContext( - successCountActual, - exceptionCountActual, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); + + if (isReadOnlyRequest) { + return new LocationSpecificContext( + locationSpecificContext.successCountForWrite, + locationSpecificContext.exceptionCountForWrite, + successCountActual, + locationSpecificContext.exceptionCountForRead, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } else { + return new LocationSpecificContext( + successCountActual, + locationSpecificContext.exceptionCountForWrite, + locationSpecificContext.successCountForRead, + locationSpecificContext.exceptionCountForRead, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } } } break; @@ -396,7 +464,7 @@ public LocationSpecificContext handleSuccess( partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, OperationType.Read)); + .getRegionName(locationWithSuccess, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); } @@ -406,7 +474,7 @@ public LocationSpecificContext handleSuccess( partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, OperationType.Read)); + .getRegionName(locationWithSuccess, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); } break; @@ -420,26 +488,43 @@ public LocationSpecificContext handleSuccess( public LocationSpecificContext handleException( LocationSpecificContext locationSpecificContext, PartitionKeyRangeWrapper partitionKeyRangeWrapper, - URI locationWithException) { + URI locationWithException, + boolean isReadOnlyRequest) { logger.warn("Handling exception"); LocationUnavailabilityStatus currentStatusSnapshot = locationSpecificContext.locationUnavailabilityStatus; - int allowedExceptionCount = getAllowedFailureCountByStatus(currentStatusSnapshot); + int allowedExceptionCount = getAllowedExceptionCount(currentStatusSnapshot, isReadOnlyRequest); - int exceptionCountActual = locationSpecificContext.exceptionCount; + int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.exceptionCountForWrite; switch (currentStatusSnapshot) { case Available: if (exceptionCountActual < allowedExceptionCount) { - exceptionCountActual++; + + exceptionCountActual += 1; + logger.info("Exception count : {}", exceptionCountActual); - return new LocationSpecificContext( - locationSpecificContext.successCount, - exceptionCountActual, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); + + if (isReadOnlyRequest) { + return new LocationSpecificContext( + locationSpecificContext.successCountForWrite, + locationSpecificContext.exceptionCountForWrite, + locationSpecificContext.successCountForRead, + exceptionCountActual, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } else { + return new LocationSpecificContext( + locationSpecificContext.successCountForWrite, + exceptionCountActual, + locationSpecificContext.successCountForRead, + locationSpecificContext.exceptionCountForRead, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } } else { GlobalPartitionEndpointManagerForCircuitBreaker .this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); @@ -453,13 +538,28 @@ public LocationSpecificContext handleException( } case StaleUnavailable: if (exceptionCountActual < allowedExceptionCount) { - exceptionCountActual++; - return new LocationSpecificContext( - locationSpecificContext.successCount, - exceptionCountActual, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); + + exceptionCountActual += 1; + + if (isReadOnlyRequest) { + return new LocationSpecificContext( + locationSpecificContext.successCountForWrite, + locationSpecificContext.exceptionCountForWrite, + locationSpecificContext.successCountForRead, + exceptionCountActual, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } else { + return new LocationSpecificContext( + locationSpecificContext.successCountForWrite, + exceptionCountActual, + locationSpecificContext.successCountForRead, + locationSpecificContext.exceptionCountForRead, + locationSpecificContext.unavailableSince, + locationSpecificContext.locationUnavailabilityStatus, + locationSpecificContext.isExceptionThresholdBreached); + } } else { logger.info("Partition {}-{} of collection : {} marked as FreshUnavailable from StaleUnavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), @@ -479,22 +579,26 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat switch (newStatus) { case Available: return new LocationSpecificContext( + 0, + 0, 0, 0, Instant.MAX, LocationUnavailabilityStatus.Available, - false - ); + false); case FreshUnavailable: return new LocationSpecificContext( + 0, + 0, 0, 0, Instant.now(), LocationUnavailabilityStatus.FreshUnavailable, - true - ); + true); case StaleUnavailable: return new LocationSpecificContext( + 0, + 0, 0, 0, Instant.MAX, @@ -534,25 +638,73 @@ private enum LocationUnavailabilityStatus { Available, FreshUnavailable, StaleUnavailable; } - private static double getAllowedFailureRatioByStatus(LocationUnavailabilityStatus status) { - switch (status) { - case Available: - return 0.3d; - case StaleUnavailable: - return 0.1d; - default: - return 0d; + private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { + + if (isReadOnlyRequest) { + switch (status) { + case Available: + return 0.3d; + case StaleUnavailable: + return 0.1d; + default: + return 0d; + } + } else { + switch (status) { + case Available: + return 0.2d; + case StaleUnavailable: + return 0.05d; + default: + return 0d; + } } } - private static int getAllowedFailureCountByStatus(LocationUnavailabilityStatus status) { - switch (status) { - case Available: - return 10; - case StaleUnavailable: - return 5; - default: - throw new IllegalStateException("Unsupported health status: " + status); + private static int getAllowedExceptionCount(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { + + if (isReadOnlyRequest) { + switch (status) { + case Available: + return 10; + case StaleUnavailable: + return 5; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } else { + switch (status) { + case Available: + return 5; + case StaleUnavailable: + return 2; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } + } + + private static int getMinimumSuccessCountForStatusUpgrade(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { + if (isReadOnlyRequest) { + switch (status) { + case StaleUnavailable: + return 5; + case FreshUnavailable: + case Available: + return 0; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } else { + switch (status) { + case StaleUnavailable: + return 10; + case FreshUnavailable: + case Available: + return 0; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } } } } From 2de5bda6967ec4108b9da6898d60e56322efb154 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 22 May 2024 20:46:50 -0400 Subject: [PATCH 040/140] Added separate exception / success counters for write / non-write operations. --- ...itionEndpointManagerForCircuitBreaker.java | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index b1c8c1fbbab1..a05eed57e6e6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -155,7 +155,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); List unavailableLocations = new ArrayList<>(); - boolean doesPartitionHaveUnavailableLocations = false; + boolean doesPartitionHaveUnhealthyLocations = false; if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { Map locationEndpointToFailureMetricsForPartition = @@ -167,16 +167,16 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.FreshUnavailable) { unavailableLocations.add(location); - doesPartitionHaveUnavailableLocations = true; + doesPartitionHaveUnhealthyLocations = true; } else if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.StaleUnavailable) { - doesPartitionHaveUnavailableLocations = true; - } else if (locationSpecificContext.exceptionCountForWrite >= 1) { - doesPartitionHaveUnavailableLocations = true; + doesPartitionHaveUnhealthyLocations = true; + } else if (locationSpecificContext.exceptionCountForWrite >= 1 || locationSpecificContext.exceptionCountForRead >= 1) { + doesPartitionHaveUnhealthyLocations = true; } } } - if (!doesPartitionHaveUnavailableLocations) { + if (!doesPartitionHaveUnhealthyLocations) { this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRangeWrapper); } @@ -240,7 +240,14 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper this.locationEndpointToLocationSpecificContextForPartition.compute(locationWithException, (locationAsKey, locationSpecificContextAsVal) -> { if (locationSpecificContextAsVal == null) { - locationSpecificContextAsVal = new LocationSpecificContext(0, 0, 0, 0, Instant.MAX, LocationUnavailabilityStatus.Available, false); + locationSpecificContextAsVal = new LocationSpecificContext( + 0, + 0, + 0, + 0, + Instant.MAX, + LocationUnavailabilityStatus.Available, + false); } LocationSpecificContext locationSpecificContextAfterTransition = GlobalPartitionEndpointManagerForCircuitBreaker From 649d934a69a882d26ff95ef71d7d2cb5b2dfcb9c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 23 May 2024 14:48:38 -0400 Subject: [PATCH 041/140] Added separate exception / success counters for write / non-write operations. --- .../com/azure/cosmos/implementation/RxGatewayStoreModel.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 84fe6ea120db..f777304315ea 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -252,9 +252,7 @@ public Mono performRequestInternal(RxDocumentServiceR HttpHeaders httpHeaders = this.getHttpRequestHeaders(request.getHeaders()); - Flux contentAsByteArray = request.getContentAsByteArrayFlux().doOnSubscribe(ignore -> { - request.requestContext.isRequestSendingStarted = true; - }); + Flux contentAsByteArray = request.getContentAsByteArrayFlux(); HttpRequest httpRequest = new HttpRequest(method, requestUri, @@ -409,6 +407,7 @@ private Mono toDocumentServiceResponse(Mono Date: Tue, 28 May 2024 09:59:02 -0400 Subject: [PATCH 042/140] Adding `Healthy` status. --- .../implementation/ClientRetryPolicy.java | 30 +++++ ...itionEndpointManagerForCircuitBreaker.java | 126 +++++++++++------- .../rntbd/RntbdRequestRecord.java | 2 - 3 files changed, 105 insertions(+), 53 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 69619b84b242..b0f4b1336694 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -164,6 +164,20 @@ public Mono shouldRetry(Exception e) { clientException); } + if (clientException != null && Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.REQUEST_TIMEOUT)) { + logger.info( + "Request timeout - IsReadRequest {}, IsWebExceptionRetriable {}, NonIdempotentWriteRetriesEnabled {}", + this.isReadRequest, + false, + this.request.getNonIdempotentWriteRetriesEnabled(), + e); + + return this.shouldRetryOnRequestTimeout( + this.isReadRequest, + this.request.getNonIdempotentWriteRetriesEnabled() + ); + } + return this.throttlingRetry.shouldRetry(e); } @@ -375,6 +389,22 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( return Mono.just(ShouldRetryResult.retryAfter(Duration.ZERO)); } + private Mono shouldRetryOnRequestTimeout( + boolean isReadRequest, + boolean nonIdempotentWriteRetriesEnabled) { + + if (Configs.isPartitionLevelCircuitBreakerEnabled() && + !isReadRequest && + !nonIdempotentWriteRetriesEnabled) { + + this.globalPartitionEndpointManager.handleLocationExceptionForPartitionKeyRange( + request, + request.requestContext.locationEndpointToRoute); + } + + return Mono.just(ShouldRetryResult.NO_RETRY); + } + @Override public void onBeforeSendRequest(RxDocumentServiceRequest request) { this.request = request; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index a05eed57e6e6..161be3c7237d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -131,15 +131,19 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r URI succeededLocation = request.requestContext.locationEndpointToRoute; - PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot - = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); + this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionKeyRangeToFailoverInfoAsVal) -> { - if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { - partitionLevelLocationUnavailabilityInfoSnapshot.handleSuccess( + if (partitionKeyRangeToFailoverInfoAsVal == null) { + partitionKeyRangeToFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); + } + + partitionKeyRangeToFailoverInfoAsVal.handleSuccess( partitionKeyRangeWrapper, succeededLocation, request.isReadOnlyRequest()); - } + + return partitionKeyRangeToFailoverInfoAsVal; + }); } public List getUnavailableLocationEndpointsForPartitionKeyRange(String resourceId, PartitionKeyRange partitionKeyRange) { @@ -155,7 +159,6 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); List unavailableLocations = new ArrayList<>(); - boolean doesPartitionHaveUnhealthyLocations = false; if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { Map locationEndpointToFailureMetricsForPartition = @@ -165,21 +168,12 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso URI location = pair.getKey(); LocationSpecificContext locationSpecificContext = pair.getValue(); - if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.FreshUnavailable) { + if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.Unavailable) { unavailableLocations.add(location); - doesPartitionHaveUnhealthyLocations = true; - } else if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.StaleUnavailable) { - doesPartitionHaveUnhealthyLocations = true; - } else if (locationSpecificContext.exceptionCountForWrite >= 1 || locationSpecificContext.exceptionCountForRead >= 1) { - doesPartitionHaveUnhealthyLocations = true; } } } - if (!doesPartitionHaveUnhealthyLocations) { - this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRangeWrapper); - } - return UnmodifiableList.unmodifiableList(unavailableLocations); } @@ -246,7 +240,7 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper 0, 0, Instant.MAX, - LocationUnavailabilityStatus.Available, + LocationUnavailabilityStatus.Healthy, false); } @@ -267,17 +261,28 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation, boolean isReadOnlyRequest) { this.locationEndpointToLocationSpecificContextForPartition.compute(succeededLocation, (locationAsKey, locationSpecificContextAsVal) -> { - if (locationSpecificContextAsVal != null) { - locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationContextTransitionHandler.handleSuccess( - locationSpecificContextAsVal, - partitionKeyRangeWrapper, - succeededLocation, - false, - isReadOnlyRequest); + LocationSpecificContext locationSpecificContextAfterTransition; + + if (locationSpecificContextAsVal == null) { + locationSpecificContextAsVal = new LocationSpecificContext( + 0, + 0, + 0, + 0, + Instant.MAX, + LocationUnavailabilityStatus.Healthy, + false); } - return locationSpecificContextAsVal; + locationSpecificContextAfterTransition = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + succeededLocation, + false, + isReadOnlyRequest); + + return locationSpecificContextAfterTransition; }); } @@ -369,7 +374,8 @@ public boolean isExceptionThresholdBreached() { } public boolean isRegionAvailableToProcessRequests() { - return this.locationUnavailabilityStatus == LocationUnavailabilityStatus.Available || + return this.locationUnavailabilityStatus == LocationUnavailabilityStatus.Healthy || + this.locationUnavailabilityStatus == LocationUnavailabilityStatus.HealthyWithFailures || this.locationUnavailabilityStatus == LocationUnavailabilityStatus.StaleUnavailable; } } @@ -394,7 +400,9 @@ public LocationSpecificContext handleSuccess( int successCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.successCountForWrite; switch (currentStatusSnapshot) { - case Available: + case Healthy: + break; + case HealthyWithFailures: if (!forceStatusChange) { if (exceptionCountActual > 0) { @@ -421,8 +429,8 @@ public LocationSpecificContext handleSuccess( } } } - break; + case StaleUnavailable: if (!forceStatusChange) { @@ -437,7 +445,7 @@ public LocationSpecificContext handleSuccess( partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithSuccess, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.Available); + return this.transitionHealthStatus(LocationUnavailabilityStatus.Healthy); } else { if (isReadOnlyRequest) { @@ -462,7 +470,7 @@ public LocationSpecificContext handleSuccess( } } break; - case FreshUnavailable: + case Unavailable: Instant unavailableSinceActual = locationSpecificContext.unavailableSince; if (!forceStatusChange) { if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { @@ -501,12 +509,14 @@ public LocationSpecificContext handleException( logger.warn("Handling exception"); LocationUnavailabilityStatus currentStatusSnapshot = locationSpecificContext.locationUnavailabilityStatus; - int allowedExceptionCount = getAllowedExceptionCount(currentStatusSnapshot, isReadOnlyRequest); + int allowedExceptionCount = getAllowedExceptionCountToMaintainStatus(currentStatusSnapshot, isReadOnlyRequest); int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.exceptionCountForWrite; switch (currentStatusSnapshot) { - case Available: + case Healthy: + return this.transitionHealthStatus(LocationUnavailabilityStatus.HealthyWithFailures); + case HealthyWithFailures: if (exceptionCountActual < allowedExceptionCount) { exceptionCountActual += 1; @@ -541,7 +551,7 @@ public LocationSpecificContext handleException( partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithException, OperationType.Read)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.FreshUnavailable); + return this.transitionHealthStatus(LocationUnavailabilityStatus.Unavailable); } case StaleUnavailable: if (exceptionCountActual < allowedExceptionCount) { @@ -574,7 +584,7 @@ public LocationSpecificContext handleException( partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithException, OperationType.Read)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.FreshUnavailable); + return this.transitionHealthStatus(LocationUnavailabilityStatus.Unavailable); } default: throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); @@ -584,23 +594,32 @@ public LocationSpecificContext handleException( public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStatus newStatus) { switch (newStatus) { - case Available: + case Healthy: return new LocationSpecificContext( 0, 0, 0, 0, Instant.MAX, - LocationUnavailabilityStatus.Available, + LocationUnavailabilityStatus.Healthy, false); - case FreshUnavailable: + case HealthyWithFailures: + return new LocationSpecificContext( + 0, + 0, + 0, + 0, + Instant.MAX, + LocationUnavailabilityStatus.HealthyWithFailures, + false); + case Unavailable: return new LocationSpecificContext( 0, 0, 0, 0, Instant.now(), - LocationUnavailabilityStatus.FreshUnavailable, + LocationUnavailabilityStatus.Unavailable, true); case StaleUnavailable: return new LocationSpecificContext( @@ -610,8 +629,7 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat 0, Instant.MAX, LocationUnavailabilityStatus.StaleUnavailable, - false - ); + false); default: throw new IllegalStateException("Unsupported health status: " + newStatus); } @@ -642,14 +660,14 @@ public int hashCode() { } private enum LocationUnavailabilityStatus { - Available, FreshUnavailable, StaleUnavailable; + Healthy, HealthyWithFailures, Unavailable, StaleUnavailable } private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { if (isReadOnlyRequest) { switch (status) { - case Available: + case HealthyWithFailures: return 0.3d; case StaleUnavailable: return 0.1d; @@ -658,7 +676,7 @@ private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilitySt } } else { switch (status) { - case Available: + case HealthyWithFailures: return 0.2d; case StaleUnavailable: return 0.05d; @@ -668,23 +686,27 @@ private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilitySt } } - private static int getAllowedExceptionCount(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { + private static int getAllowedExceptionCountToMaintainStatus(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { if (isReadOnlyRequest) { switch (status) { - case Available: + case HealthyWithFailures: return 10; case StaleUnavailable: return 5; + case Healthy: + return 0; default: throw new IllegalStateException("Unsupported health status: " + status); } } else { switch (status) { - case Available: + case HealthyWithFailures: return 5; case StaleUnavailable: return 2; + case Healthy: + return 0; default: throw new IllegalStateException("Unsupported health status: " + status); } @@ -696,8 +718,9 @@ private static int getMinimumSuccessCountForStatusUpgrade(LocationUnavailability switch (status) { case StaleUnavailable: return 5; - case FreshUnavailable: - case Available: + case Unavailable: + case HealthyWithFailures: + case Healthy: return 0; default: throw new IllegalStateException("Unsupported health status: " + status); @@ -706,8 +729,9 @@ private static int getMinimumSuccessCountForStatusUpgrade(LocationUnavailability switch (status) { case StaleUnavailable: return 10; - case FreshUnavailable: - case Available: + case Unavailable: + case HealthyWithFailures: + case Healthy: return 0; default: throw new IllegalStateException("Unsupported health status: " + status); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java index f22974442c5d..2239596047da 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java @@ -266,8 +266,6 @@ public long getRequestId() { public boolean expire() { final CosmosException error; - RxDocumentServiceRequest serviceRequest = this.args().serviceRequest(); - if ((this.args.serviceRequest().isReadOnly() || !this.hasSendingRequestStarted()) || this.args.serviceRequest().getNonIdempotentWriteRetriesEnabled()){ // Convert from requestTimeoutException to GoneException for the following two scenarios so they can be safely retried: From 152c116eee948d77509da9aff9d8cf90d0c15809 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 29 May 2024 13:16:54 -0400 Subject: [PATCH 043/140] Adding `CosmosDiagnostics` for change feed `FeedResponse`. --- .../PartitionLevelCircuitBreakerTests.java | 383 +++++++++++++++--- .../implementation/ChangeFeedQueryImpl.java | 4 +- ...itionEndpointManagerForCircuitBreaker.java | 65 ++- .../ImplementationBridgeHelpers.java | 6 + .../implementation/RxDocumentClientImpl.java | 4 + .../com/azure/cosmos/models/FeedResponse.java | 28 ++ 6 files changed, 385 insertions(+), 105 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index b3705634524d..f0389e48d790 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -15,13 +15,11 @@ import com.azure.cosmos.TestObject; import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; import com.azure.cosmos.faultinjection.FaultInjectionTestBase; -import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.guava25.base.Function; import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchResponse; import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; -import com.azure.cosmos.models.CosmosContainerIdentity; import com.azure.cosmos.models.CosmosContainerProperties; import com.azure.cosmos.models.CosmosItemIdentity; import com.azure.cosmos.models.CosmosItemRequestOptions; @@ -49,7 +47,6 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Factory; import org.testng.annotations.Test; -import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import java.time.Duration; @@ -61,6 +58,7 @@ import java.util.Map; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; import static org.assertj.core.api.Assertions.assertThat; @@ -100,23 +98,145 @@ public void beforeClass() { @DataProvider(name = "partitionLevelCircuitBreakerTestConfigs") public Object[][] partitionLevelCircuitBreakerTestConfigs() { + + Function> serviceUnavailableRulesGenerator + = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; + + Function> serverGeneratedGoneRuleGenerator + = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneRules; + + Function> transitTimeoutRuleGenerator + = PartitionLevelCircuitBreakerTests::buildTransitTimeoutRules; + return new Object[][] { - {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.BATCH_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.READ_FEED_ITEM, FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionOperationType.READ_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionOperationType.UPSERT_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionOperationType.REPLACE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionOperationType.DELETE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionOperationType.PATCH_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionOperationType.CREATE_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionOperationType.QUERY_ITEM, FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false} +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) +// .withHitLimit(12), +// serviceUnavailableRulesGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) +// .withHitLimit(7), +// serviceUnavailableRulesGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) +// .withHitLimit(7), +// serviceUnavailableRulesGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) +// .withHitLimit(7), +// serviceUnavailableRulesGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) +// .withHitLimit(7), +// serviceUnavailableRulesGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withHitLimit(7), +// serviceUnavailableRulesGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withHitLimit(7), +// serviceUnavailableRulesGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) +// .withHitLimit(7), +// serviceUnavailableRulesGenerator, +// null +// }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withHitLimit(12), + serviceUnavailableRulesGenerator, + null + }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// serverGeneratedGoneRuleGenerator, +// TWO_SECOND_TIMEOUT +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// serverGeneratedGoneRuleGenerator, +// TWO_SECOND_TIMEOUT +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// serverGeneratedGoneRuleGenerator, +// TWO_SECOND_TIMEOUT +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// serverGeneratedGoneRuleGenerator, +// TWO_SECOND_TIMEOUT +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// serverGeneratedGoneRuleGenerator, +// TWO_SECOND_TIMEOUT +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// serverGeneratedGoneRuleGenerator, +// TWO_SECOND_TIMEOUT +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// serverGeneratedGoneRuleGenerator, +// TWO_SECOND_TIMEOUT +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)) +// .withResponseDelay(Duration.ofSeconds(6)), +// transitTimeoutRuleGenerator, +// null +// }, +// new Object[] { +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)) +// .withResponseDelay(Duration.ofSeconds(6)), +// transitTimeoutRuleGenerator, +// null +// } }; } @@ -132,14 +252,11 @@ public Object[][] readManyTestConfigs() { @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs") public void operationHitsTerminalExceptionInFirstPreferredRegion( - FaultInjectionOperationType faultInjectionOperationType, - FaultInjectionServerErrorType faultInjectionServerErrorType, - int faultInjectionHitCount, - Duration faultInjectionDuration, - boolean shouldEndToEndTimeoutBeInjected, - boolean shouldThresholdBasedAvailabilityStrategyBeEnabled) { + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> faultInjectionRuleGenerator, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg) { - logger.info("Checking circuit breaking behavior for {}", faultInjectionOperationType); + logger.info("Checking circuit breaking behavior for {}", faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()); List preferredRegions = this.writeRegions; CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); @@ -157,7 +274,7 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( CosmosAsyncContainer container = null; CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(getProvisionedThroughputForContainer(faultInjectionOperationType)); + ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(getProvisionedThroughputForContainer(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType())); OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); @@ -170,7 +287,7 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( Thread.sleep(10_000); - int testObjCountToBootstrapFrom = getTestObjectCountToBootstrapFrom(faultInjectionOperationType, 15); + int testObjCountToBootstrapFrom = getTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); List testObjects = new ArrayList<>(); for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { @@ -187,47 +304,21 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( faultyFeedRange = FeedRange.forLogicalPartition(new PartitionKey(testObjects.get(0).getId())); } - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(faultInjectionOperationType) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(faultyFeedRange).build()) - .region(preferredRegions.get(0)) - .build(); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableRegions(Arrays.asList(preferredRegions.get(0))); - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(faultInjectionServerErrorType) - .build(); + List faultInjectionRules = faultInjectionRuleGenerator.apply(faultInjectionRuleParamsWrapper); - FaultInjectionRule faultInjectionRule = null; - - if (faultInjectionServerErrorType == FaultInjectionServerErrorType.GONE) { - faultInjectionRule = new FaultInjectionRuleBuilder("gone-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(faultInjectionDuration) - .build(); - } else if (faultInjectionServerErrorType == FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) { - faultInjectionRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(faultInjectionHitCount) - .build(); - } - - if (faultInjectionRule != null) { + if (faultInjectionRules != null && !faultInjectionRules.isEmpty()) { Function> faultInjectedFunc = - generateOperation(faultInjectionOperationType); + generateOperation(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()); assertThat(faultInjectedFunc).isNotNull().as("faultInjectedFunc cannot be null!"); - if (shouldEndToEndTimeoutBeInjected) { - - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg = (shouldThresholdBasedAvailabilityStrategyBeEnabled) ? - TWO_SECOND_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY : - TWO_SECOND_TIMEOUT; - + if (e2eLatencyPolicyCfg != null) { operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions() .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); @@ -242,13 +333,14 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = faultyFeedRange; CosmosFaultInjectionHelper - .configureFaultInjectionRules(operationInvocationParamsWrapper.asyncContainer, Arrays.asList(faultInjectionRule)) + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) .block(); for (int i = 1; i <= 15; i++) { operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); - logger.info("Hit count : {}", faultInjectionRule.getHitCount()); + + logger.info("Hit count : {}", faultInjectionRules.stream().mapToLong(FaultInjectionRule::getHitCount).sum()); if (response.cosmosItemResponse != null) { assertThat(response.cosmosItemResponse).isNotNull(); @@ -288,7 +380,7 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); - logger.info("Hit count : {}", faultInjectionRule.getHitCount()); + logger.info("Hit count : {}", faultInjectionRules.stream().mapToLong(FaultInjectionRule::getHitCount).sum()); if (response.cosmosItemResponse != null) { assertThat(response.cosmosItemResponse).isNotNull(); @@ -1048,6 +1140,80 @@ private static class OperationInvocationParamsWrapper { public FeedRange feedRangeToDrainForChangeFeed; } + private static class FaultInjectionRuleParamsWrapper { + + private CosmosAsyncContainer faultInjectionApplicableAsyncContainer; + private Integer hitLimit; + private Duration responseDelay; + private Duration faultInjectionDuration; + private List faultInjectionApplicableRegions; + private FeedRange faultInjectionApplicableFeedRange; + private FaultInjectionOperationType faultInjectionOperationType; + + public CosmosAsyncContainer getFaultInjectionApplicableAsyncContainer() { + return faultInjectionApplicableAsyncContainer; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionApplicableAsyncContainer(CosmosAsyncContainer faultInjectionApplicableAsyncContainer) { + this.faultInjectionApplicableAsyncContainer = faultInjectionApplicableAsyncContainer; + return this; + } + + public Integer getHitLimit() { + return hitLimit; + } + + public FaultInjectionRuleParamsWrapper withHitLimit(Integer hitLimit) { + this.hitLimit = hitLimit; + return this; + } + + public Duration getResponseDelay() { + return responseDelay; + } + + public FaultInjectionRuleParamsWrapper withResponseDelay(Duration responseDelay) { + this.responseDelay = responseDelay; + return this; + } + + public Duration getFaultInjectionDuration() { + return faultInjectionDuration; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionDuration(Duration faultInjectionDuration) { + this.faultInjectionDuration = faultInjectionDuration; + return this; + } + + public List getFaultInjectionApplicableRegions() { + return faultInjectionApplicableRegions; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionApplicableRegions(List faultInjectionApplicableRegions) { + this.faultInjectionApplicableRegions = faultInjectionApplicableRegions; + return this; + } + + public FeedRange getFaultInjectionApplicableFeedRange() { + return faultInjectionApplicableFeedRange; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionApplicableFeedRange(FeedRange faultInjectionApplicableFeedRange) { + this.faultInjectionApplicableFeedRange = faultInjectionApplicableFeedRange; + return this; + } + + public FaultInjectionOperationType getFaultInjectionOperationType() { + return faultInjectionOperationType; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionOperationType(FaultInjectionOperationType faultInjectionOperationType) { + this.faultInjectionOperationType = faultInjectionOperationType; + return this; + } + } + private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { Iterator locationIterator = writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); @@ -1060,4 +1226,93 @@ private static Map getRegionMap(DatabaseAccount databaseAccount, return regionMap; } + + private static List buildServiceUnavailableRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(paramsWrapper.getHitLimit()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildServerGeneratedGoneRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.GONE) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("gone-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildTransitTimeoutRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) + .delay(paramsWrapper.getResponseDelay()) + .suppressServiceRequests(false) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("response-delay-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java index 3ca3ff747f77..881e4f11676e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java @@ -179,7 +179,7 @@ private Mono> executeRequestAsync(RxDocumentServiceRequest reque }); })) .flatMap(client::readFeed) - .map(rsp -> feedResponseAccessor.createChangeFeedResponse(rsp, this.itemSerializer, klass)); + .map(rsp -> feedResponseAccessor.createChangeFeedResponse(rsp, this.itemSerializer, klass, rsp.getCosmosDiagnostics())); } else { final OperationListener listener = operationContextAndListener.getOperationListener(); final OperationContext operationContext = operationContextAndListener.getOperationContext(); @@ -219,7 +219,7 @@ private Mono> executeRequestAsync(RxDocumentServiceRequest reque listener.responseListener(operationContext, rsp); final FeedResponse feedResponse = feedResponseAccessor.createChangeFeedResponse( - rsp, this.itemSerializer, klass); + rsp, this.itemSerializer, klass, rsp.getCosmosDiagnostics()); Map rspHeaders = feedResponse.getResponseHeaders(); String requestPkRangeId = null; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 161be3c7237d..dd44a09e7c44 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -62,31 +62,13 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest AtomicBoolean isFailoverPossible = new AtomicBoolean(true); AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); + this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionLevelLocationUnavailabilityInfoAsVal) -> { - if (partitionLevelLocationUnavailabilityInfoSnapshot == null) { - this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionKeyRangeFailoverInfoAsVal) -> { - - if (partitionKeyRangeFailoverInfoAsVal == null) { - partitionKeyRangeFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); - } - - isFailureThresholdBreached.set(partitionKeyRangeFailoverInfoAsVal.handleException(partitionKeyRangeWrapperAsKey, failedLocation, request.isReadOnlyRequest())); - - if (isFailureThresholdBreached.get()) { - - UnmodifiableList applicableEndpoints = request.isReadOnlyRequest() ? - this.globalEndpointManager.getApplicableReadEndpoints(request.requestContext.getExcludeRegions()) : - this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); + if (partitionLevelLocationUnavailabilityInfoAsVal == null) { + partitionLevelLocationUnavailabilityInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); + } - isFailoverPossible.set( - partitionKeyRangeFailoverInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapperAsKey, applicableEndpoints, request.isReadOnlyRequest())); - } - - return partitionKeyRangeFailoverInfoAsVal; - }); - } else { - isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoSnapshot.handleException(partitionKeyRangeWrapper, failedLocation, request.isReadOnlyRequest())); + isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoAsVal.handleException(partitionKeyRangeWrapperAsKey, failedLocation, request.isReadOnlyRequest())); if (isFailureThresholdBreached.get()) { @@ -95,12 +77,11 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); isFailoverPossible.set( - partitionLevelLocationUnavailabilityInfoSnapshot.areLocationsAvailableForPartitionKeyRange( - partitionKeyRangeWrapper, - applicableEndpoints, - request.isReadOnlyRequest())); + partitionLevelLocationUnavailabilityInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapperAsKey, applicableEndpoints, request.isReadOnlyRequest())); } - } + + return partitionLevelLocationUnavailabilityInfoAsVal; + }); // set to true if and only if failure threshold exceeded for the region // and if failover is possible @@ -240,7 +221,7 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper 0, 0, Instant.MAX, - LocationUnavailabilityStatus.Healthy, + LocationUnavailabilityStatus.HealthyWithFailures, false); } @@ -397,7 +378,7 @@ public LocationSpecificContext handleSuccess( int minSuccessCountForStatusUpgrade = getMinimumSuccessCountForStatusUpgrade(currentStatusSnapshot, isReadOnlyRequest); int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.exceptionCountForWrite; - int successCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.successCountForWrite; + int successCountActual = isReadOnlyRequest ? locationSpecificContext.successCountForRead : locationSpecificContext.successCountForWrite; switch (currentStatusSnapshot) { case Healthy: @@ -438,13 +419,13 @@ public LocationSpecificContext handleSuccess( logger.info("Try to switch to Available but actual success count : {}", successCountActual); - if (successCountActual > minSuccessCountForStatusUpgrade && (double) exceptionCountActual / (double) successCountActual < allowedFailureRatio) { + if (successCountActual >= minSuccessCountForStatusUpgrade && (double) exceptionCountActual / (double) successCountActual < allowedFailureRatio) { logger.info("Partition {}-{} of collection : {} marked as Available from StaleUnavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.Healthy); } else { @@ -474,12 +455,12 @@ public LocationSpecificContext handleSuccess( Instant unavailableSinceActual = locationSpecificContext.unavailableSince; if (!forceStatusChange) { if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { - logger.info("Partition {}-{} of collection : {} marked as StaleUnavailable from FreshUnavailable for region : {}", + logger.info("Partition {}-{} of collection : {} marked as StaleUnavailable from Unavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); } @@ -489,7 +470,7 @@ public LocationSpecificContext handleSuccess( partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); } break; @@ -515,6 +496,12 @@ public LocationSpecificContext handleException( switch (currentStatusSnapshot) { case Healthy: + logger.info("Partition {}-{} of collection : {} marked as HealthyWithFailures from Healthy for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.HealthyWithFailures); case HealthyWithFailures: if (exceptionCountActual < allowedExceptionCount) { @@ -545,12 +532,12 @@ public LocationSpecificContext handleException( } else { GlobalPartitionEndpointManagerForCircuitBreaker .this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); - logger.info("Partition {}-{} of collection : {} marked as FreshUnavailable from Available for region : {}", + logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyWithFailures for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, OperationType.Read)); + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.Unavailable); } case StaleUnavailable: @@ -578,12 +565,12 @@ public LocationSpecificContext handleException( locationSpecificContext.isExceptionThresholdBreached); } } else { - logger.info("Partition {}-{} of collection : {} marked as FreshUnavailable from StaleUnavailable for region : {}", + logger.info("Partition {}-{} of collection : {} marked as Unavailable from StaleUnavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, OperationType.Read)); + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.Unavailable); } default: diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 9db179467c04..48f67f632a43 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -1037,6 +1037,12 @@ FeedResponse createFeedResponse(RxDocumentServiceResponse response, FeedResponse createChangeFeedResponse(RxDocumentServiceResponse response, CosmosItemSerializer itemSerializer, Class cls); + + FeedResponse createChangeFeedResponse(RxDocumentServiceResponse response, + CosmosItemSerializer itemSerializer, + Class cls, + CosmosDiagnostics diagnostics); + boolean getNoChanges(FeedResponse feedResponse); FeedResponse convertGenericType(FeedResponse feedResponse, Function conversion); FeedResponse createFeedResponse( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 6cfb3e60792e..a627e52b8379 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -165,6 +165,10 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization ImplementationBridgeHelpers.CosmosItemResponseHelper.CosmosItemResponseBuilderAccessor itemResponseAccessor = ImplementationBridgeHelpers.CosmosItemResponseHelper.getCosmosItemResponseBuilderAccessor(); + private final static + ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.CosmosChangeFeedRequestOptionsAccessor changeFeedRequestOptAccessor = + ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.getCosmosChangeFeedRequestOptionsAccessor(); + private static final String tempMachineId = "uuid:" + UUID.randomUUID(); private static final AtomicInteger activeClientsCnt = new AtomicInteger(0); private static final Map clientMap = new ConcurrentHashMap<>(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java index 92ba73f78271..0978d5fb2f20 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/FeedResponse.java @@ -81,6 +81,10 @@ public class FeedResponse implements ContinuablePage { this(results, header, true, nochanges, new ConcurrentHashMap<>()); } + FeedResponse(List results, Map header, boolean nochanges, CosmosDiagnostics diagnostics) { + this(results, header, true, nochanges, new ConcurrentHashMap<>(), diagnostics); + } + FeedResponse(List results, Map headers, CosmosDiagnostics diagnostics) { this(results, headers); @@ -116,6 +120,23 @@ private FeedResponse( this.cosmosDiagnostics = BridgeInternal.createCosmosDiagnostics(queryMetricsMap); } + private FeedResponse( + List results, + Map header, + boolean useEtagAsContinuation, + boolean nochanges, + ConcurrentMap queryMetricsMap, + CosmosDiagnostics diagnostics) { + this.results = results; + this.header = header; + this.usageHeaders = new HashMap<>(); + this.quotaHeaders = new HashMap<>(); + this.useEtagAsContinuation = useEtagAsContinuation; + this.nochanges = nochanges; + this.queryMetricsMap = new ConcurrentHashMap<>(queryMetricsMap); + this.cosmosDiagnostics = diagnostics; + } + private FeedResponse( List transformedResults, FeedResponse toBeCloned) { @@ -594,6 +615,13 @@ public FeedResponse createChangeFeedResponse(RxDocumentServiceResponse re response.getResponseHeaders(), noChanges(response)); } + @Override + public FeedResponse createChangeFeedResponse(RxDocumentServiceResponse response, CosmosItemSerializer itemSerializer, Class cls, CosmosDiagnostics diagnostics) { + return new FeedResponse<>( + noChanges(response) ? Collections.emptyList() : response.getQueryResponse(itemSerializer, cls), + response.getResponseHeaders(), noChanges(response), diagnostics); + } + @Override public boolean getNoChanges(FeedResponse feedResponse) { return feedResponse.getNoChanges(); From 0f92454b05f994400814afd7c39a199237069b9c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 29 May 2024 18:13:06 -0400 Subject: [PATCH 044/140] Refactoring. --- .../PartitionLevelCircuitBreakerTests.java | 242 +++++++++--------- ...itionEndpointManagerForCircuitBreaker.java | 67 +++-- 2 files changed, 154 insertions(+), 155 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index f0389e48d790..18fb0d7f57aa 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -109,62 +109,62 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { = PartitionLevelCircuitBreakerTests::buildTransitTimeoutRules; return new Object[][] { -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) -// .withHitLimit(12), -// serviceUnavailableRulesGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) -// .withHitLimit(7), -// serviceUnavailableRulesGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) -// .withHitLimit(7), -// serviceUnavailableRulesGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) -// .withHitLimit(7), -// serviceUnavailableRulesGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) -// .withHitLimit(7), -// serviceUnavailableRulesGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withHitLimit(7), -// serviceUnavailableRulesGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withHitLimit(7), -// serviceUnavailableRulesGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) -// .withHitLimit(7), -// serviceUnavailableRulesGenerator, -// null -// }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withHitLimit(12), + serviceUnavailableRulesGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withHitLimit(7), + serviceUnavailableRulesGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withHitLimit(7), + serviceUnavailableRulesGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withHitLimit(7), + serviceUnavailableRulesGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withHitLimit(7), + serviceUnavailableRulesGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withHitLimit(7), + serviceUnavailableRulesGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(7), + serviceUnavailableRulesGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withHitLimit(7), + serviceUnavailableRulesGenerator, + null + }, new Object[] { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) @@ -172,71 +172,71 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { serviceUnavailableRulesGenerator, null }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// serverGeneratedGoneRuleGenerator, -// TWO_SECOND_TIMEOUT -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// serverGeneratedGoneRuleGenerator, -// TWO_SECOND_TIMEOUT -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// serverGeneratedGoneRuleGenerator, -// TWO_SECOND_TIMEOUT -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// serverGeneratedGoneRuleGenerator, -// TWO_SECOND_TIMEOUT -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// serverGeneratedGoneRuleGenerator, -// TWO_SECOND_TIMEOUT -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// serverGeneratedGoneRuleGenerator, -// TWO_SECOND_TIMEOUT -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// serverGeneratedGoneRuleGenerator, -// TWO_SECOND_TIMEOUT -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)) -// .withResponseDelay(Duration.ofSeconds(6)), -// transitTimeoutRuleGenerator, -// null -// }, -// new Object[] { -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)) -// .withResponseDelay(Duration.ofSeconds(6)), -// transitTimeoutRuleGenerator, -// null -// } + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + serverGeneratedGoneRuleGenerator, + TWO_SECOND_TIMEOUT + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + serverGeneratedGoneRuleGenerator, + TWO_SECOND_TIMEOUT + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + serverGeneratedGoneRuleGenerator, + TWO_SECOND_TIMEOUT + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + serverGeneratedGoneRuleGenerator, + TWO_SECOND_TIMEOUT + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + serverGeneratedGoneRuleGenerator, + TWO_SECOND_TIMEOUT + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + serverGeneratedGoneRuleGenerator, + TWO_SECOND_TIMEOUT + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + serverGeneratedGoneRuleGenerator, + TWO_SECOND_TIMEOUT + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + transitTimeoutRuleGenerator, + null + }, + new Object[] { + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + transitTimeoutRuleGenerator, + null + } }; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index dd44a09e7c44..258ad74345dd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -26,12 +26,12 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); private final GlobalEndpointManager globalEndpointManager; - private final ConcurrentHashMap partitionKeyRangeToFailoverInfo; + private final ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo; private final ConcurrentHashMap partitionsWithPossibleUnavailableRegions; private final LocationContextTransitionHandler locationContextTransitionHandler; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { - this.partitionKeyRangeToFailoverInfo = new ConcurrentHashMap<>(); + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); this.partitionsWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; this.locationContextTransitionHandler = new LocationContextTransitionHandler(); @@ -62,7 +62,7 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest AtomicBoolean isFailoverPossible = new AtomicBoolean(true); AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); - this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionLevelLocationUnavailabilityInfoAsVal) -> { + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionLevelLocationUnavailabilityInfoAsVal) -> { if (partitionLevelLocationUnavailabilityInfoAsVal == null) { partitionLevelLocationUnavailabilityInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); @@ -91,7 +91,7 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest } // no regions to fail over to - this.partitionKeyRangeToFailoverInfo.remove(partitionKeyRangeWrapper); + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.remove(partitionKeyRangeWrapper); } public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { @@ -109,10 +109,9 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r logger.info("Handling success : {}", resourceId); PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); - URI succeededLocation = request.requestContext.locationEndpointToRoute; - this.partitionKeyRangeToFailoverInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionKeyRangeToFailoverInfoAsVal) -> { + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionKeyRangeToFailoverInfoAsVal) -> { if (partitionKeyRangeToFailoverInfoAsVal == null) { partitionKeyRangeToFailoverInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); @@ -137,7 +136,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = - this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); List unavailableLocations = new ArrayList<>(); @@ -170,7 +169,7 @@ private Flux updateStaleLocationInfo() { PartitionKeyRangeWrapper partitionKeyRangeWrapper = partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair.getKey(); - PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToFailoverInfo.get(partitionKeyRangeWrapper); + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); if (partitionLevelLocationUnavailabilityInfo != null) { for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.entrySet()) { @@ -281,9 +280,9 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe } } - Instant mostStaleUnavailableTimeAcrossRegions = Instant.MAX; + Instant mostHealthyTentativeTimeAcrossRegions = Instant.MAX; LocationSpecificContext locationLevelFailureMetadataForMostStaleLocation = null; - URI mostStaleUnavailableLocation = null; + URI mostHealthyTentativeLocation = null; // find region with most 'stale' unavailability for (Map.Entry uriToLocationLevelFailureMetadata : this.locationEndpointToLocationSpecificContextForPartition.entrySet()) { @@ -295,22 +294,22 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe Instant unavailableSinceSnapshot = locationSpecificContext.unavailableSince; - if (mostStaleUnavailableTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { - mostStaleUnavailableTimeAcrossRegions = unavailableSinceSnapshot; - mostStaleUnavailableLocation = uriToLocationLevelFailureMetadata.getKey(); + if (mostHealthyTentativeTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { + mostHealthyTentativeTimeAcrossRegions = unavailableSinceSnapshot; + mostHealthyTentativeLocation = uriToLocationLevelFailureMetadata.getKey(); locationLevelFailureMetadataForMostStaleLocation = locationSpecificContext; } } if (locationLevelFailureMetadataForMostStaleLocation != null) { - this.locationEndpointToLocationSpecificContextForPartition.compute(mostStaleUnavailableLocation, (mostStaleUnavailableLocationAsKey, locationSpecificStatusAsVal) -> { + this.locationEndpointToLocationSpecificContextForPartition.compute(mostHealthyTentativeLocation, (mostHealthyTentativeLocationAsKey, locationSpecificStatusAsVal) -> { if (locationSpecificStatusAsVal != null) { locationSpecificStatusAsVal = GlobalPartitionEndpointManagerForCircuitBreaker .this.locationContextTransitionHandler.handleSuccess( locationSpecificStatusAsVal, partitionKeyRangeWrapper, - mostStaleUnavailableLocationAsKey, + mostHealthyTentativeLocationAsKey, true, isReadOnlyRequest); } @@ -357,7 +356,7 @@ public boolean isExceptionThresholdBreached() { public boolean isRegionAvailableToProcessRequests() { return this.locationUnavailabilityStatus == LocationUnavailabilityStatus.Healthy || this.locationUnavailabilityStatus == LocationUnavailabilityStatus.HealthyWithFailures || - this.locationUnavailabilityStatus == LocationUnavailabilityStatus.StaleUnavailable; + this.locationUnavailabilityStatus == LocationUnavailabilityStatus.HealthyTentative; } } @@ -412,15 +411,15 @@ public LocationSpecificContext handleSuccess( } break; - case StaleUnavailable: + case HealthyTentative: if (!forceStatusChange) { successCountActual += 1; - logger.info("Try to switch to Available but actual success count : {}", successCountActual); + logger.info("Try to switch to Healthy but actual success count : {}", successCountActual); if (successCountActual >= minSuccessCountForStatusUpgrade && (double) exceptionCountActual / (double) successCountActual < allowedFailureRatio) { - logger.info("Partition {}-{} of collection : {} marked as Available from StaleUnavailable for region : {}", + logger.info("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, @@ -455,23 +454,23 @@ public LocationSpecificContext handleSuccess( Instant unavailableSinceActual = locationSpecificContext.unavailableSince; if (!forceStatusChange) { if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { - logger.info("Partition {}-{} of collection : {} marked as StaleUnavailable from Unavailable for region : {}", + logger.info("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); + return this.transitionHealthStatus(LocationUnavailabilityStatus.HealthyTentative); } } else { - logger.info("Partition {}-{} of collection : {} marked as StaleUnavailable from FreshAvailable for region : {}", + logger.info("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.StaleUnavailable); + return this.transitionHealthStatus(LocationUnavailabilityStatus.HealthyTentative); } break; default: @@ -540,7 +539,7 @@ public LocationSpecificContext handleException( .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); return this.transitionHealthStatus(LocationUnavailabilityStatus.Unavailable); } - case StaleUnavailable: + case HealthyTentative: if (exceptionCountActual < allowedExceptionCount) { exceptionCountActual += 1; @@ -565,7 +564,7 @@ public LocationSpecificContext handleException( locationSpecificContext.isExceptionThresholdBreached); } } else { - logger.info("Partition {}-{} of collection : {} marked as Unavailable from StaleUnavailable for region : {}", + logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, @@ -608,14 +607,14 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat Instant.now(), LocationUnavailabilityStatus.Unavailable, true); - case StaleUnavailable: + case HealthyTentative: return new LocationSpecificContext( 0, 0, 0, 0, Instant.MAX, - LocationUnavailabilityStatus.StaleUnavailable, + LocationUnavailabilityStatus.HealthyTentative, false); default: throw new IllegalStateException("Unsupported health status: " + newStatus); @@ -647,7 +646,7 @@ public int hashCode() { } private enum LocationUnavailabilityStatus { - Healthy, HealthyWithFailures, Unavailable, StaleUnavailable + Healthy, HealthyWithFailures, Unavailable, HealthyTentative } private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { @@ -656,7 +655,7 @@ private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilitySt switch (status) { case HealthyWithFailures: return 0.3d; - case StaleUnavailable: + case HealthyTentative: return 0.1d; default: return 0d; @@ -665,7 +664,7 @@ private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilitySt switch (status) { case HealthyWithFailures: return 0.2d; - case StaleUnavailable: + case HealthyTentative: return 0.05d; default: return 0d; @@ -679,7 +678,7 @@ private static int getAllowedExceptionCountToMaintainStatus(LocationUnavailabili switch (status) { case HealthyWithFailures: return 10; - case StaleUnavailable: + case HealthyTentative: return 5; case Healthy: return 0; @@ -690,7 +689,7 @@ private static int getAllowedExceptionCountToMaintainStatus(LocationUnavailabili switch (status) { case HealthyWithFailures: return 5; - case StaleUnavailable: + case HealthyTentative: return 2; case Healthy: return 0; @@ -703,7 +702,7 @@ private static int getAllowedExceptionCountToMaintainStatus(LocationUnavailabili private static int getMinimumSuccessCountForStatusUpgrade(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { if (isReadOnlyRequest) { switch (status) { - case StaleUnavailable: + case HealthyTentative: return 5; case Unavailable: case HealthyWithFailures: @@ -714,7 +713,7 @@ private static int getMinimumSuccessCountForStatusUpgrade(LocationUnavailability } } else { switch (status) { - case StaleUnavailable: + case HealthyTentative: return 10; case Unavailable: case HealthyWithFailures: From 925170c1ad06a7d378d9d873991756359a15ca0e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 29 May 2024 18:15:26 -0400 Subject: [PATCH 045/140] Refactoring. --- .../com/azure/cosmos/implementation/RxDocumentClientImpl.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index a627e52b8379..6cfb3e60792e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -165,10 +165,6 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization ImplementationBridgeHelpers.CosmosItemResponseHelper.CosmosItemResponseBuilderAccessor itemResponseAccessor = ImplementationBridgeHelpers.CosmosItemResponseHelper.getCosmosItemResponseBuilderAccessor(); - private final static - ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.CosmosChangeFeedRequestOptionsAccessor changeFeedRequestOptAccessor = - ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.getCosmosChangeFeedRequestOptionsAccessor(); - private static final String tempMachineId = "uuid:" + UUID.randomUUID(); private static final AtomicInteger activeClientsCnt = new AtomicInteger(0); private static final Map clientMap = new ConcurrentHashMap<>(); From e2dad7bc43314e4bdf670c9a272a21bacc053286 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 29 May 2024 18:24:53 -0400 Subject: [PATCH 046/140] Refactoring. --- .../directconnectivity/AddressResolverTest.java | 3 --- .../directconnectivity/GlobalAddressResolverTest.java | 3 --- .../cosmos/implementation/query/DocumentProducerTest.java | 1 - .../implementation/IGlobalPartitionEndpointManager.java | 8 -------- .../azure/cosmos/models/CosmosQueryRequestOptions.java | 8 -------- .../azure/cosmos/models/CosmosReadManyRequestOptions.java | 2 -- 6 files changed, 25 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java index 022baf93e923..982a2be96006 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java @@ -6,11 +6,9 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentCollection; -import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; -import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.InvalidPartitionException; import com.azure.cosmos.implementation.NotFoundException; import com.azure.cosmos.implementation.OperationType; @@ -28,7 +26,6 @@ import com.azure.cosmos.implementation.routing.InMemoryCollectionRoutingMap; import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; import com.azure.cosmos.implementation.routing.PartitionKeyRangeIdentity; -import com.azure.cosmos.models.ModelBridgeInternal; import com.azure.cosmos.models.PartitionKey; import com.azure.cosmos.models.PartitionKeyDefinition; import org.apache.commons.lang3.NotImplementedException; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java index 705e5025d46a..bc76d0e51613 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java @@ -7,14 +7,12 @@ import com.azure.cosmos.DirectConnectionConfig; import com.azure.cosmos.CosmosContainerProactiveInitConfig; import com.azure.cosmos.CosmosContainerProactiveInitConfigBuilder; -import com.azure.cosmos.implementation.AsyncDocumentClient; import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; -import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; @@ -30,7 +28,6 @@ import com.azure.cosmos.implementation.routing.PartitionKeyInternalHelper; import com.azure.cosmos.implementation.routing.PartitionKeyRangeIdentity; import com.azure.cosmos.models.CosmosContainerIdentity; -import com.azure.cosmos.models.ModelBridgeInternal; import org.mockito.ArgumentMatchers; import org.mockito.Mockito; import org.testng.annotations.BeforeClass; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 7a660b955ac6..1db9722e4e96 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -13,7 +13,6 @@ import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; -import com.azure.cosmos.implementation.IGlobalPartitionEndpointManager; import com.azure.cosmos.implementation.IRetryPolicyFactory; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RetryPolicy; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java deleted file mode 100644 index 994e67bcbd0b..000000000000 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/IGlobalPartitionEndpointManager.java +++ /dev/null @@ -1,8 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -package com.azure.cosmos.implementation; - -public interface IGlobalPartitionEndpointManager { - boolean tryBookmarkRegionSuccessForPartitionKeyRange(RxDocumentServiceRequest request); -} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index 6ad671c42243..505a46d590a1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -11,18 +11,10 @@ import com.azure.cosmos.implementation.CosmosQueryRequestOptionsBase; import com.azure.cosmos.implementation.CosmosQueryRequestOptionsImpl; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; -import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.RequestOptions; import java.time.Duration; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.function.Function; /** * Specifies the options associated with query methods (enumeration operations) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java index ffef1e61212b..8f5997499d01 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosReadManyRequestOptions.java @@ -10,11 +10,9 @@ import com.azure.cosmos.implementation.CosmosQueryRequestOptionsBase; import com.azure.cosmos.implementation.CosmosReadManyRequestOptionsImpl; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; -import com.azure.cosmos.implementation.PartitionKeyRange; import java.time.Duration; import java.util.List; -import java.util.Set; /** * Specifies the options associated with read many operation From de3991c8895842bd4d836969f7064bfde60f6646 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 30 May 2024 08:38:34 -0400 Subject: [PATCH 047/140] Fixing CI pipeline. --- .../cosmos/implementation/query/Fetcher.java | 7 +++-- .../implementation/routing/LocationCache.java | 27 ++++++++++++------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index baee14719b45..95cac94feea0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -5,7 +5,6 @@ import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.implementation.Configs; -import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.FeedOperationContext; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; @@ -202,11 +201,11 @@ private Mono> nextPage(RxDocumentServiceRequest request) { if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { - this.tryMarkPartitionKeyRangeAsUnavailable(request); + this.handleLocationExceptionForPartitionKeyRange(request); } } } else { - this.tryMarkPartitionKeyRangeAsUnavailable(request); + this.handleLocationExceptionForPartitionKeyRange(request); } } @@ -216,7 +215,7 @@ private Mono> nextPage(RxDocumentServiceRequest request) { }); } - private void tryMarkPartitionKeyRangeAsUnavailable(RxDocumentServiceRequest failedRequest) { + private void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); if (firstContactedLocationEndpoint != null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index be717b0849d0..5844fbc75ea7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -238,13 +238,17 @@ public UnmodifiableList getApplicableWriteEndpoints(List excludedRe List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); - logger.info("Printing unavailable location for partition"); + if (unavailableRegionsForPartition != null) { - for (String unavailableRegionForPartition : unavailableRegionsForPartition) { - logger.info("Unavailable region : {}", unavailableRegionForPartition); - } + // todo: remove logging statements eventually + logger.info("Printing unavailable location for partition"); + + for (String unavailableRegionForPartition : unavailableRegionsForPartition) { + logger.info("Unavailable region : {}", unavailableRegionForPartition); + } - effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); + effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); + } // filter regions based on the exclude region config return this.getApplicableEndpoints( @@ -275,12 +279,17 @@ public UnmodifiableList getApplicableReadEndpoints(List excludedReg } List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); - effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); - logger.info("Printing unavailable region for partition"); + if (unavailableRegionsForPartition != null) { + + // todo: remove logging statements eventually + logger.info("Printing unavailable region for partition"); + + for (String unavailableRegionForPartition : unavailableRegionsForPartition) { + logger.info("Unavailable region : {}", unavailableRegionForPartition); + } - for (String unavailableRegionForPartition : unavailableRegionsForPartition) { - logger.info("Unavailable region : {}", unavailableRegionForPartition); + effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); } // filter regions based on the exclude region config From d62d188a156ac49612ec30bb36e5b601c5d87ce5 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 30 May 2024 09:27:33 -0400 Subject: [PATCH 048/140] Fixing CI pipeline. --- .../query/DocumentProducerTest.java | 41 +++++++++++++++++++ .../com/azure/cosmos/CosmosDiagnostics.java | 11 ----- .../ImplementationBridgeHelpers.java | 2 - 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 1db9722e4e96..064bf25bf01b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -556,6 +556,17 @@ public void simple() { return feedOperation.apply(retryPolicyFactory, req); }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).populateFeedRangeHeader(any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer<>( @@ -634,6 +645,7 @@ public void retries() { behaviourAfterException); IDocumentQueryClient queryClient = Mockito.mock(IDocumentQueryClient.class); + doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); RxDocumentServiceRequest req = invocation.getArgument(3); @@ -643,6 +655,16 @@ public void retries() { return feedOperation.apply(retryPolicyFactory, req); }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).populateFeedRangeHeader(any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer<>( @@ -733,6 +755,17 @@ public void retriesExhausted() { return feedOperation.apply(retryPolicyFactory, req); }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).populateFeedRangeHeader(any()); + + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer( @@ -858,6 +891,14 @@ private IDocumentQueryClient mockQueryClient(List replacement doReturn(cache).when(client).getPartitionKeyRangeCache(); doReturn(Mono.just(new Utils.ValueHolder<>(replacementRanges))) .when(cache).tryGetOverlappingRangesAsync(any(), any(), any(), anyBoolean(), ArgumentMatchers.any()); + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(client).populateFeedRangeHeader(any()); + doAnswer(invocation -> { + RxDocumentServiceRequest req = invocation.getArgument(0); + return Mono.just(req); + }).when(client).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); return client; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java index 6b67640c7d19..0e9157d411bf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java @@ -7,7 +7,6 @@ import com.azure.cosmos.implementation.FeedResponseDiagnostics; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.RxDocumentServiceRequest; -import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.util.Beta; import com.fasterxml.jackson.core.JsonProcessingException; @@ -474,16 +473,6 @@ public void setDiagnosticsContext(CosmosDiagnostics cosmosDiagnostics, CosmosDia cosmosDiagnostics.setDiagnosticsContext(ctx); } - @Override - public String getFirstContactedRegion(CosmosDiagnostics cosmosDiagnostics) { - - if (cosmosDiagnostics == null) { - return StringUtils.EMPTY; - } - - return cosmosDiagnostics.getFirstContactedRegion(); - } - @Override public URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 48f67f632a43..de5318ce2ba5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -808,8 +808,6 @@ void recordAddressResolutionEnd( void setDiagnosticsContext(CosmosDiagnostics cosmosDiagnostics, CosmosDiagnosticsContext ctx); - String getFirstContactedRegion(CosmosDiagnostics cosmosDiagnostics); - URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics); } } From b75f6d09f95c6a16ed652afbcd7042c6deff2915 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 30 May 2024 09:36:12 -0400 Subject: [PATCH 049/140] Fixing CI pipeline. --- .../cosmos/models/CosmosChangeFeedRequestOptions.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java index c51425bb9c98..caa8a03f4a26 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java @@ -627,19 +627,19 @@ private void addCustomOptionsForFullFidelityMode() { HttpConstants.ChangeFeedWireFormatVersions.SEPARATE_METADATA_WITH_CRTS); } - public String getCollectionRid() { + String getCollectionRid() { return collectionRid; } - public void setCollectionRid(String collectionRid) { + void setCollectionRid(String collectionRid) { this.collectionRid = collectionRid; } - public PartitionKeyDefinition getPartitionKeyDefinition() { + PartitionKeyDefinition getPartitionKeyDefinition() { return partitionKeyDefinition; } - public void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { this.partitionKeyDefinition = partitionKeyDefinition; } From a6c960b42f796d20f6b1c6d59abb282d360738c5 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 30 May 2024 11:51:57 -0400 Subject: [PATCH 050/140] Fixing CI pipeline. --- .../cosmos/implementation/CosmosQueryRequestOptionsImpl.java | 1 + .../com/azure/cosmos/implementation/RxDocumentClientImpl.java | 2 ++ .../query/DocumentQueryExecutionContextFactory.java | 1 + 3 files changed, 4 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java index f1d9b041efca..acaea9a5dcb5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java @@ -68,6 +68,7 @@ public CosmosQueryRequestOptionsImpl(CosmosQueryRequestOptionsImpl options) { this.feedRange = options.feedRange; this.cancelledRequestDiagnosticsTracker = options.cancelledRequestDiagnosticsTracker; this.maxItemCountForVectorSearch = options.maxItemCountForVectorSearch; + this.collectionRid = options.collectionRid; } /** diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 6cfb3e60792e..c0b06889fa04 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -3795,6 +3795,8 @@ public Mono addPartitionLevelUnavailableRegionsOnReque String collectionRid = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getCollectionRid(queryRequestOptions); + checkNotNull(collectionRid, "collectionRid cannot be null!"); + if (RxDocumentClientImpl.this.requiresFeedRangeFiltering(request)) { return RxDocumentClientImpl.this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), collectionRid, null, null) .flatMap(collectionRoutingMapValueHolder -> { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java index 472b5ba42625..a1b506391718 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextFactory.java @@ -313,6 +313,7 @@ public static Flux> createDocume return collectionObs.single().flatMap(collectionValueHolder -> { queryRequestOptionsAccessor.setPartitionKeyDefinition(cosmosQueryRequestOptions, collectionValueHolder.v.getPartitionKey()); + queryRequestOptionsAccessor.setCollectionRid(cosmosQueryRequestOptions, collectionValueHolder.v.getResourceId()); Mono>, QueryInfo>> queryPlanTask = getPartitionKeyRangesAndQueryInfo(diagnosticsClientContext, From 576d97b087405c9c603edbb95965ea2a8e36aa2d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 30 May 2024 13:52:39 -0400 Subject: [PATCH 051/140] Fixing CI pipeline. --- .../cosmos/implementation/RxDocumentClientImplTest.java | 6 +++++- .../query/DocumentQueryExecutionContextBase.java | 3 +++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java index 27a7fd407092..139a0f45d79b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java @@ -203,13 +203,17 @@ public void readMany() { Mockito .when(this.collectionCacheMock.resolveCollectionAsync(Mockito.isNull(), Mockito.any(RxDocumentServiceRequest.class))) .thenReturn(Mono.just(dummyCollectionObs())); + + Mockito + .when(this.collectionCacheMock.resolveByNameAsync(Mockito.isNull(), Mockito.anyString(), Mockito.isNull())) + .thenReturn(Mono.just(dummyCollectionObs().v)); + Mockito .when(this.partitionKeyRangeCacheMock.tryLookupAsync(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any())) .thenReturn(Mono.just(dummyCollectionRoutingMap(epksPartitionKeyRangeMap))); Mockito.when(this.resetSessionTokenRetryPolicyMock.getRequestPolicy(null)).thenReturn(dummyDocumentClientRetryPolicy()); - // initialize object to be tested RxDocumentClientImpl rxDocumentClient = new RxDocumentClientImpl( this.serviceEndpointMock, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java index bf5067195d69..4fa2cdbad490 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentQueryExecutionContextBase.java @@ -122,6 +122,9 @@ protected RxDocumentServiceRequest createDocumentServiceRequestWithFeedRange(Map ? this.createQueryDocumentServiceRequest(requestHeaders, querySpec) : this.createReadFeedDocumentServiceRequest(requestHeaders); request.requestContext.resolvedCollectionRid = collectionRid; + + qryOptAccessor.setCollectionRid(cosmosQueryRequestOptions, collectionRid); + request.setResourceId(collectionRid); request.throughputControlGroupName = throughputControlGroupName; From 9e064aec1d72c6c54848434bc14821551f3ba819 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 30 May 2024 15:21:47 -0400 Subject: [PATCH 052/140] Fixing CI pipeline. --- .../azure/cosmos/implementation/RxDocumentClientImpl.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index c0b06889fa04..87cff702115d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -3793,11 +3793,12 @@ public Mono populateFeedRangeHeader(RxDocumentServiceR @Override public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions) { - String collectionRid = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getCollectionRid(queryRequestOptions); + if (RxDocumentClientImpl.this.requiresFeedRangeFiltering(request)) { - checkNotNull(collectionRid, "collectionRid cannot be null!"); + String collectionRid = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getCollectionRid(queryRequestOptions); + + checkNotNull(collectionRid, "collectionRid cannot be null!"); - if (RxDocumentClientImpl.this.requiresFeedRangeFiltering(request)) { return RxDocumentClientImpl.this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), collectionRid, null, null) .flatMap(collectionRoutingMapValueHolder -> { From ffaf8269343ffaa0b73f049d5d7300be98bb15df Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 30 May 2024 16:37:38 -0400 Subject: [PATCH 053/140] Fixing CI pipeline. --- .../cosmos/implementation/RxDocumentClientImpl.java | 9 ++++++--- .../query/DefaultDocumentQueryExecutionContext.java | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 87cff702115d..31c6581a9552 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2322,10 +2322,13 @@ private Mono handleRegionFeedbackForPointOperation( return response .doOnSuccess(ignore -> { - RxDocumentServiceRequest succeededRequest = requestReference.get(); - PointOperationContext pointOperationContext = succeededRequest.requestContext.getPointOperationContext(); - pointOperationContext.setHasOperationSeenSuccess(); + if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + RxDocumentServiceRequest succeededRequest = requestReference.get(); + + PointOperationContext pointOperationContext = succeededRequest.requestContext.getPointOperationContext(); + pointOperationContext.setHasOperationSeenSuccess(); + } }) .doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index 224b2bcdbb8c..8c18e3d523fb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -132,7 +132,7 @@ public Flux> executeAsync() { return Paginator .getPaginatedQueryResultAsObservable( - newCosmosQueryRequestOptions, createRequestFunc, executeFunc, maxPageSize, null, null); + newCosmosQueryRequestOptions, createRequestFunc, executeFunc, maxPageSize, this.client.getGlobalEndpointManager(), this.client.getGlobalPartitionEndpointManagerForCircuitBreaker()); } public Mono> getTargetPartitionKeyRanges(String resourceId, List> queryRanges) { From 3c412343d643c1db063714dcded430fff809a26e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 31 May 2024 12:52:53 -0400 Subject: [PATCH 054/140] Refactoring. --- .../implementation/AsyncDocumentClient.java | 2 + .../implementation/ChangeFeedQueryImpl.java | 96 +++++++++---------- ...itionEndpointManagerForCircuitBreaker.java | 18 ++++ .../implementation/RxDocumentClientImpl.java | 47 +++++++-- .../cosmos/implementation/query/Fetcher.java | 16 +++- 5 files changed, 115 insertions(+), 64 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index a8ac6b1b3993..5557544cfb3a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -1581,6 +1581,8 @@ Flux> readAllDocuments( */ GlobalEndpointManager getGlobalEndpointManager(); + GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker(); + /*** * Get the address selector. * @return the address selector. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java index 881e4f11676e..da77530b69eb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java @@ -3,6 +3,8 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.CosmosItemSerializer; +import com.azure.cosmos.implementation.apachecommons.collections.CollectionUtils; +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStateV1; import com.azure.cosmos.implementation.feedranges.FeedRangeInternal; @@ -16,6 +18,8 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import java.net.URI; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -151,33 +155,7 @@ private RxDocumentServiceRequest createDocumentServiceRequest() { private Mono> executeRequestAsync(RxDocumentServiceRequest request) { if (this.operationContextAndListener == null) { - return Mono.just(request) - .flatMap(req -> client.populateHeadersAsync(req, RequestVerb.GET)) - .flatMap(req -> client.getCollectionCache().resolveCollectionAsync(null, req) - .flatMap(documentCollectionValueHolder -> { - - checkNotNull(documentCollectionValueHolder, "documentCollectionValueHolder cannot be null!"); - checkNotNull(documentCollectionValueHolder.v, "documentCollectionValueHolder.v cannot be null!"); - - return client.getPartitionKeyRangeCache().tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) - .flatMap(collectionRoutingMapValueHolder -> { - - checkNotNull(collectionRoutingMapValueHolder, "collectionRoutingMapValueHolder cannot be null!"); - checkNotNull(collectionRoutingMapValueHolder.v, "collectionRoutingMapValueHolder.v cannot be null!"); - - changeFeedRequestOptionsAccessor.setPartitionKeyDefinition(options, documentCollectionValueHolder.v.getPartitionKey()); - changeFeedRequestOptionsAccessor.setCollectionRid(options, documentCollectionValueHolder.v.getResourceId()); - - client.addPartitionLevelUnavailableRegionsForChangeFeedRequest(req, options, collectionRoutingMapValueHolder.v); - - if (req.requestContext.getClientRetryPolicySupplier() != null) { - DocumentClientRetryPolicy documentClientRetryPolicy = req.requestContext.getClientRetryPolicySupplier().get(); - documentClientRetryPolicy.onBeforeSendRequest(req); - } - - return Mono.just(req); - }); - })) + return handlePartitionLevelCircuitBreakingPrerequisites(request) .flatMap(client::readFeed) .map(rsp -> feedResponseAccessor.createChangeFeedResponse(rsp, this.itemSerializer, klass, rsp.getCosmosDiagnostics())); } else { @@ -188,6 +166,42 @@ private Mono> executeRequestAsync(RxDocumentServiceRequest reque .put(HttpConstants.HttpHeaders.CORRELATED_ACTIVITY_ID, operationContext.getCorrelationActivityId()); listener.requestListener(operationContext, request); + return handlePartitionLevelCircuitBreakingPrerequisites(request) + .flatMap(client::readFeed) + .map(rsp -> { + listener.responseListener(operationContext, rsp); + + final FeedResponse feedResponse = feedResponseAccessor.createChangeFeedResponse( + rsp, this.itemSerializer, klass, rsp.getCosmosDiagnostics()); + + Map rspHeaders = feedResponse.getResponseHeaders(); + String requestPkRangeId = null; + if (!rspHeaders.containsKey(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID) && + (requestPkRangeId = request + .getHeaders() + .get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID)) != null) { + + rspHeaders.put( + HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID, + requestPkRangeId + ); + } + listener.feedResponseReceivedListener(operationContext, feedResponse); + + return feedResponse; + }) + .doOnError(ex -> listener.exceptionListener(operationContext, ex)); + } + } + + private Mono handlePartitionLevelCircuitBreakingPrerequisites(RxDocumentServiceRequest request) { + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = client.getGlobalPartitionEndpointManagerForCircuitBreaker(); + + checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "Argument 'globalPartitionEndpointManagerForCircuitBreaker' must not be null!"); + + if (globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { return Mono.just(request) .flatMap(req -> client.populateHeadersAsync(req, RequestVerb.GET)) .flatMap(req -> client.getCollectionCache().resolveCollectionAsync(null, req) @@ -214,31 +228,9 @@ private Mono> executeRequestAsync(RxDocumentServiceRequest reque return Mono.just(req); }); - })).flatMap(client::readFeed) - .map(rsp -> { - listener.responseListener(operationContext, rsp); - - final FeedResponse feedResponse = feedResponseAccessor.createChangeFeedResponse( - rsp, this.itemSerializer, klass, rsp.getCosmosDiagnostics()); - - Map rspHeaders = feedResponse.getResponseHeaders(); - String requestPkRangeId = null; - if (!rspHeaders.containsKey(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID) && - (requestPkRangeId = request - .getHeaders() - .get(HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID)) != null) { - - rspHeaders.put( - HttpConstants.HttpHeaders.PARTITION_KEY_RANGE_ID, - requestPkRangeId - ); - } - listener.feedResponseReceivedListener(operationContext, feedResponse); - - return feedResponse; - }) - .doOnError(ex -> listener.exceptionListener(operationContext, ex) - ); + })); + } else { + return Mono.just(request); } } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 258ad74345dd..549ed89f6fb0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -13,6 +13,7 @@ import java.time.Duration; import java.time.Instant; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Objects; @@ -199,6 +200,23 @@ private Flux updateStaleLocationInfo() { }); } + public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceRequest request) { + + if (!Configs.isPartitionLevelCircuitBreakerEnabled()) { + return false; + } + + GlobalEndpointManager globalEndpointManager = this.globalEndpointManager; + + if (!globalEndpointManager.canUseMultipleWriteLocations(request)) { + return false; + } + + UnmodifiableList applicableWriteEndpoints = globalEndpointManager.getApplicableWriteEndpoints(Collections.emptyList()); + + return applicableWriteEndpoints != null && applicableWriteEndpoints.size() > 1; + } + private class PartitionLevelLocationUnavailabilityInfo { private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 31c6581a9552..26d7f6161e72 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2323,20 +2323,32 @@ private Mono handleRegionFeedbackForPointOperation( return response .doOnSuccess(ignore -> { - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - RxDocumentServiceRequest succeededRequest = requestReference.get(); + RxDocumentServiceRequest succeededRequest = requestReference.get(); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(succeededRequest)) { + + checkNotNull(succeededRequest.requestContext, "Argument 'succeededRequest.requestContext' must not be null!"); PointOperationContext pointOperationContext = succeededRequest.requestContext.getPointOperationContext(); + + checkNotNull(pointOperationContext, "Argument 'pointOperationContext' must not be null!"); + pointOperationContext.setHasOperationSeenSuccess(); } }) .doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - RxDocumentServiceRequest failedRequest = requestReference.get(); + RxDocumentServiceRequest failedRequest = requestReference.get(); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(failedRequest)) { + + checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); + PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); + checkNotNull(pointOperationContext, "Argument 'pointOperationContext' must not be null!"); + if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { @@ -2350,17 +2362,27 @@ private Mono handleRegionFeedbackForPointOperation( }) .doFinally(signalType -> { - if (signalType == SignalType.CANCEL && Configs.isPartitionLevelCircuitBreakerEnabled()) { - RxDocumentServiceRequest failedRequest = requestReference.get(); - PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); + if (signalType != SignalType.CANCEL) { + return; + } + + RxDocumentServiceRequest potentiallyFailedRequest = requestReference.get(); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(potentiallyFailedRequest)) { + + checkNotNull(potentiallyFailedRequest.requestContext, "Argument 'potentiallyFailedRequest.requestContext' must not be null!"); + + PointOperationContext pointOperationContext = potentiallyFailedRequest.requestContext.getPointOperationContext(); + + checkNotNull(pointOperationContext, "Argument 'pointOperationContext' must not be null!"); if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { - this.handleLocationExceptionForPartitionKeyRange(failedRequest); + this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); } } else { - this.handleLocationExceptionForPartitionKeyRange(failedRequest); + this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); } } }); @@ -5386,6 +5408,11 @@ public GlobalEndpointManager getGlobalEndpointManager() { return this.globalEndpointManager; } + @Override + public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { + return this.globalPartitionEndpointManagerForCircuitBreaker; + } + @Override public AddressSelector getAddressSelector() { return new AddressSelector(this.addressResolver, this.configs.getProtocol()); @@ -5818,7 +5845,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); - // todo: validate if the below is possible + // todo: investigate retry policy in stale cache scenarios if (collectionRoutingMapValueHolder.v == null) { return Mono.error(new NotFoundException("collectionRoutingMapValueHolder.v cannot be null!")); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 95cac94feea0..7f8ac5c3c7ce 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -11,6 +11,7 @@ import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.ModelBridgeInternal; @@ -20,6 +21,7 @@ import reactor.core.publisher.SignalType; import java.net.URI; +import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -174,8 +176,14 @@ private Mono> nextPage(RxDocumentServiceRequest request) { .doOnNext(response -> { completed.set(true); - if (request.getResourceType() == ResourceType.Document && Configs.isPartitionLevelCircuitBreakerEnabled()) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + + checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); + FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); + + checkNotNull(feedOperationContext, "Argument 'feedOperationContext' must not be null!"); + feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); } }) @@ -193,10 +201,14 @@ private Mono> nextPage(RxDocumentServiceRequest request) { return; } - if (request.getResourceType() == ResourceType.Document && Configs.isPartitionLevelCircuitBreakerEnabled()) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + + checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); + checkNotNull(feedOperationContext, "Argument 'feedOperationContext' must not be null!"); + if (feedOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { From 7d0d88268b1ee3ea4f852f12a9ef696d2615f93d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 31 May 2024 13:07:41 -0400 Subject: [PATCH 055/140] Refactoring. --- .../implementation/ChangeFeedQueryImpl.java | 6 +- .../DocumentServiceRequestContext.java | 20 +-- ...eedOperationContextForCircuitBreaker.java} | 5 +- ...intOperationContextForCircuitBreaker.java} | 6 +- .../implementation/RxDocumentClientImpl.java | 152 +++++++++--------- .../cosmos/implementation/query/Fetcher.java | 20 +-- 6 files changed, 99 insertions(+), 110 deletions(-) rename sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/{FeedOperationContext.java => FeedOperationContextForCircuitBreaker.java} (84%) rename sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/{PointOperationContext.java => PointOperationContextForCircuitBreaker.java} (82%) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java index da77530b69eb..768ed0d4595e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java @@ -3,8 +3,6 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.CosmosItemSerializer; -import com.azure.cosmos.implementation.apachecommons.collections.CollectionUtils; -import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStateV1; import com.azure.cosmos.implementation.feedranges.FeedRangeInternal; @@ -18,8 +16,6 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; -import java.net.URI; -import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -147,7 +143,7 @@ private RxDocumentServiceRequest createDocumentServiceRequest() { if (request.requestContext != null) { request.requestContext.setExcludeRegions(options.getExcludedRegions()); - request.requestContext.setFeedOperationContext(new FeedOperationContext(new ConcurrentHashMap<>(), false)); + request.requestContext.setFeedOperationContext(new FeedOperationContextForCircuitBreaker(new ConcurrentHashMap<>(), false)); } return request; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index 456e0455be65..e1c97e7f1191 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -59,9 +59,9 @@ public class DocumentServiceRequestContext implements Cloneable { // For cancelled rntbd requests, track the response as OperationCancelledException which later will be used to populate the cosmosDiagnostics public final Map rntbdCancelledRequestMap = new ConcurrentHashMap<>(); - private PointOperationContext pointOperationContext; + private PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker; - private FeedOperationContext feedOperationContext; + private FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker; private volatile Supplier clientRetryPolicySupplier; public DocumentServiceRequestContext() {} @@ -189,20 +189,20 @@ public void setUnavailableRegionsForPartition(List unavailableRegionsFor this.unavailableRegionsForPartition = unavailableRegionsForPartition; } - public PointOperationContext getPointOperationContext() { - return pointOperationContext; + public PointOperationContextForCircuitBreaker getPointOperationContext() { + return pointOperationContextForCircuitBreaker; } - public void setPointOperationContext(PointOperationContext pointOperationContext) { - this.pointOperationContext = pointOperationContext; + public void setPointOperationContext(PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { + this.pointOperationContextForCircuitBreaker = pointOperationContextForCircuitBreaker; } - public FeedOperationContext getFeedOperationContext() { - return feedOperationContext; + public FeedOperationContextForCircuitBreaker getFeedOperationContext() { + return feedOperationContextForCircuitBreaker; } - public void setFeedOperationContext(FeedOperationContext feedOperationContext) { - this.feedOperationContext = feedOperationContext; + public void setFeedOperationContext(FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker) { + this.feedOperationContextForCircuitBreaker = feedOperationContextForCircuitBreaker; } public long getApproximateBloomFilterInsertionCount() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java similarity index 84% rename from sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java rename to sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java index 62ef75a312b2..e9923887f881 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java @@ -4,15 +4,14 @@ package com.azure.cosmos.implementation; import java.util.Map; -import java.util.Set; -public class FeedOperationContext { +public class FeedOperationContextForCircuitBreaker { private final Map partitionKeyRangesWithSuccess; private final boolean isThresholdBasedAvailabilityStrategyEnabled; private boolean isRequestHedged; - public FeedOperationContext(Map partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { + public FeedOperationContextForCircuitBreaker(Map partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { this.partitionKeyRangesWithSuccess = partitionKeyRangesWithSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java similarity index 82% rename from sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java rename to sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java index 1857666c033c..e2df15b1f9bd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java @@ -3,11 +3,9 @@ package com.azure.cosmos.implementation; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; -public class PointOperationContext { +public class PointOperationContextForCircuitBreaker { private final AtomicBoolean hasOperationSeenSuccess; @@ -15,7 +13,7 @@ public class PointOperationContext { private boolean isRequestHedged; - public PointOperationContext(AtomicBoolean hasOperationSeenSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { + public PointOperationContextForCircuitBreaker(AtomicBoolean hasOperationSeenSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { this.hasOperationSeenSuccess = hasOperationSeenSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 26d7f6161e72..0928399eeaeb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1899,7 +1899,7 @@ private Mono getBatchDocumentRequest(DocumentClientRet request.requestContext.setExcludeRegions(options.getExcludeRegions()); } - request.requestContext.setPointOperationContext(new PointOperationContext(new AtomicBoolean(false), false)); + request.requestContext.setPointOperationContext(new PointOperationContextForCircuitBreaker(new AtomicBoolean(false), false)); return this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request) .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) @@ -2196,14 +2196,14 @@ public Mono> createDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Create, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> createDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> createDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, - pointOperationContext, + pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2217,7 +2217,7 @@ private Mono> createDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap) { ScopedDiagnosticsFactory scopedDiagnosticsFactory = new ScopedDiagnosticsFactory(clientContextOverride, false); @@ -2244,7 +2244,7 @@ private Mono> createDocumentCore( scopedDiagnosticsFactory, requestReference, collectionRoutingMap, - pointOperationContext), + pointOperationContextForCircuitBreaker), requestRetryPolicy), scopedDiagnosticsFactory ), requestReference); @@ -2259,7 +2259,7 @@ private Mono> createDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference documentServiceRequestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { logger.debug("Creating a Document. collectionLink: [{}]", collectionLink); @@ -2271,7 +2271,7 @@ private Mono> createDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); documentServiceRequestReference.set(request); - request.requestContext.setPointOperationContext(pointOperationContext); + request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); if (requestRetryPolicy != null) { requestRetryPolicy.onBeforeSendRequest(request); @@ -2329,11 +2329,11 @@ private Mono handleRegionFeedbackForPointOperation( checkNotNull(succeededRequest.requestContext, "Argument 'succeededRequest.requestContext' must not be null!"); - PointOperationContext pointOperationContext = succeededRequest.requestContext.getPointOperationContext(); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = succeededRequest.requestContext.getPointOperationContext(); - checkNotNull(pointOperationContext, "Argument 'pointOperationContext' must not be null!"); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); - pointOperationContext.setHasOperationSeenSuccess(); + pointOperationContextForCircuitBreaker.setHasOperationSeenSuccess(); } }) .doOnError(throwable -> { @@ -2345,13 +2345,13 @@ private Mono handleRegionFeedbackForPointOperation( checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); - PointOperationContext pointOperationContext = failedRequest.requestContext.getPointOperationContext(); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContext(); - checkNotNull(pointOperationContext, "Argument 'pointOperationContext' must not be null!"); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); - if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { + if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { this.handleLocationExceptionForPartitionKeyRange(failedRequest); } } else { @@ -2372,13 +2372,13 @@ private Mono handleRegionFeedbackForPointOperation( checkNotNull(potentiallyFailedRequest.requestContext, "Argument 'potentiallyFailedRequest.requestContext' must not be null!"); - PointOperationContext pointOperationContext = potentiallyFailedRequest.requestContext.getPointOperationContext(); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = potentiallyFailedRequest.requestContext.getPointOperationContext(); - checkNotNull(pointOperationContext, "Argument 'pointOperationContext' must not be null!"); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); - if (pointOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { + if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!pointOperationContext.getIsRequestHedged() && pointOperationContext.getHasOperationSeenSuccess()) { + if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); } } else { @@ -2443,8 +2443,8 @@ public Mono> upsertDocument(String collectionLink, Ob return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Upsert, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> upsertDocumentCore( - collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap), + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> upsertDocumentCore( + collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), collectionLink); @@ -2457,7 +2457,7 @@ private Mono> upsertDocumentCore( boolean disableAutomaticIdGeneration, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -2483,7 +2483,7 @@ private Mono> upsertDocumentCore( scopedDiagnosticsFactory, requestReference, collectionRoutingMap, - pointOperationContext), + pointOperationContextForCircuitBreaker), finalRetryPolicyInstance), scopedDiagnosticsFactory), requestReference); } @@ -2497,7 +2497,7 @@ private Mono> upsertDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { logger.debug("Upserting a Document. collectionLink: [{}]", collectionLink); @@ -2517,7 +2517,7 @@ private Mono> upsertDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); - request.requestContext.setPointOperationContext(pointOperationContext); + request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(request); if (retryPolicyInstance != null) { @@ -2541,13 +2541,13 @@ public Mono> replaceDocument(String documentLink, Obj return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> replaceDocumentCore( documentLink, document, opt, e2ecfg, clientCtxOverride, - pointOperationContext, + pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2560,7 +2560,7 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -2588,7 +2588,7 @@ private Mono> replaceDocumentCore( scopedDiagnosticsFactory, requestReference, collectionRoutingMap, - pointOperationContext), + pointOperationContextForCircuitBreaker), requestRetryPolicy), scopedDiagnosticsFactory), requestReference); } @@ -2602,7 +2602,7 @@ private Mono> replaceDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (StringUtils.isEmpty(documentLink)) { @@ -2623,7 +2623,7 @@ private Mono> replaceDocumentInternal( clientContextOverride, requestReference, collectionRoutingMap, - pointOperationContext); + pointOperationContextForCircuitBreaker); } catch (Exception e) { logger.debug("Failure in replacing a document due to [{}]", e.getMessage()); @@ -2636,12 +2636,12 @@ public Mono> replaceDocument(Document document, Reque return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> replaceDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> replaceDocumentCore( document, opt, e2ecfg, clientCtxOverride, - pointOperationContext, + pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2653,7 +2653,7 @@ private Mono> replaceDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap) { DocumentClientRetryPolicy requestRetryPolicy = @@ -2675,7 +2675,7 @@ private Mono> replaceDocumentCore( clientContextOverride, requestReference, collectionRoutingMap, - pointOperationContext), + pointOperationContextForCircuitBreaker), requestRetryPolicy), requestReference); } @@ -2687,7 +2687,7 @@ private Mono> replaceDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (document == null) { @@ -2702,7 +2702,7 @@ private Mono> replaceDocumentInternal( clientContextOverride, requestReference, collectionRoutingMap, - pointOperationContext); + pointOperationContextForCircuitBreaker); } catch (Exception e) { logger.debug("Failure in replacing a database due to [{}]", e.getMessage()); @@ -2718,7 +2718,7 @@ private Mono> replaceDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { if (document == null) { throw new IllegalArgumentException("document"); @@ -2779,7 +2779,7 @@ private Mono> replaceDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); - req.requestContext.setPointOperationContext(pointOperationContext); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); if (retryPolicyInstance != null) { @@ -2828,13 +2828,13 @@ public Mono> patchDocument(String documentLink, return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> patchDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> patchDocumentCore( documentLink, cosmosPatchOperations, opt, e2ecfg, clientCtxOverride, - pointOperationContext, + pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2847,7 +2847,7 @@ private Mono> patchDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -2869,7 +2869,7 @@ private Mono> patchDocumentCore( scopedDiagnosticsFactory, requestReference, collectionRoutingMap, - pointOperationContext), + pointOperationContextForCircuitBreaker), documentClientRetryPolicy), scopedDiagnosticsFactory), requestReference); } @@ -2882,7 +2882,7 @@ private Mono> patchDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { checkArgument(StringUtils.isNotEmpty(documentLink), "expected non empty documentLink"); checkNotNull(cosmosPatchOperations, "expected non null cosmosPatchOperations"); @@ -2946,7 +2946,7 @@ private Mono> patchDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); - req.requestContext.setPointOperationContext(pointOperationContext); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); if (retryPolicyInstance != null) { @@ -2963,13 +2963,13 @@ public Mono> deleteDocument(String documentLink, Requ return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> deleteDocumentCore( documentLink, null, opt, e2ecfg, clientCtxOverride, - pointOperationContext, + pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -2981,13 +2981,13 @@ public Mono> deleteDocument(String documentLink, Inte return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> deleteDocumentCore( + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> deleteDocumentCore( documentLink, internalObjectNode, opt, e2ecfg, clientCtxOverride, - pointOperationContext, + pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), @@ -3000,7 +3000,7 @@ private Mono> deleteDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -3022,7 +3022,7 @@ private Mono> deleteDocumentCore( scopedDiagnosticsFactory, requestReference, collectionRoutingMap, - pointOperationContext), + pointOperationContextForCircuitBreaker), requestRetryPolicy), scopedDiagnosticsFactory), requestReference); } @@ -3035,7 +3035,7 @@ private Mono> deleteDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (StringUtils.isEmpty(documentLink)) { @@ -3072,7 +3072,7 @@ private Mono> deleteDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMap.v); - req.requestContext.setPointOperationContext(pointOperationContext); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); if (retryPolicyInstance != null) { @@ -3140,7 +3140,7 @@ private Mono> readDocument( return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Read, - (opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, pointOperationContext, collectionRoutingMap), + (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> readDocumentCore(documentLink, opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), options, false, innerDiagnosticsFactory, @@ -3152,7 +3152,7 @@ private Mono> readDocumentCore( RequestOptions options, CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap) { RequestOptions nonNullRequestOptions = options != null ? options : new RequestOptions(); @@ -3172,7 +3172,7 @@ private Mono> readDocumentCore( scopedDiagnosticsFactory, requestReference, collectionRoutingMap, - pointOperationContext), + pointOperationContextForCircuitBreaker), retryPolicyInstance), scopedDiagnosticsFactory ), requestReference); @@ -3185,7 +3185,7 @@ private Mono> readDocumentInternal( DiagnosticsClientContext clientContextOverride, AtomicReference requestReference, Utils.ValueHolder collectionRoutingMap, - PointOperationContext pointOperationContext) { + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { try { if (StringUtils.isEmpty(documentLink)) { @@ -3211,7 +3211,7 @@ private Mono> readDocumentInternal( addPartitionLevelUnavailableRegionsForRequest(req, options, collectionRoutingMap.v); - req.requestContext.setPointOperationContext(pointOperationContext); + req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); if (retryPolicyInstance != null) { @@ -5863,9 +5863,9 @@ private Mono> wrapPointOperationWithAvailabilityStrat if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful, false); - pointOperationContextForMainRequest.setIsRequestHedged(false); - return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForMainRequest, collectionRoutingMapValueHolder); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker(isOperationSuccessful, false); + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); + return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder); } ThresholdBasedAvailabilityStrategy availabilityStrategy = @@ -5884,10 +5884,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient - PointOperationContext pointOperationContextForMainRequest = new PointOperationContext(isOperationSuccessful, true); - pointOperationContextForMainRequest.setIsRequestHedged(false); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker(isOperationSuccessful, true); + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); Mono initialMonoAcrossAllRegions = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForMainRequest, collectionRoutingMapValueHolder) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isCosmosException, @@ -5914,10 +5914,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors - PointOperationContext pointOperationContextForHedgedRequest = new PointOperationContext(isOperationSuccessful, true); - pointOperationContextForHedgedRequest.setIsRequestHedged(true); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForHedgedRequest = new PointOperationContextForCircuitBreaker(isOperationSuccessful, true); + pointOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); Mono regionalCrossRegionRetryMono = - callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForHedgedRequest, collectionRoutingMapValueHolder) + callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForHedgedRequest, collectionRoutingMapValueHolder) .map(NonTransientPointOperationResult::new) .onErrorResume( RxDocumentClientImpl::isNonTransientCosmosException, @@ -6205,9 +6205,9 @@ private Mono executeFeedOperationWithAvailabilityStrategy( if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess, false); - feedOperationContextForMainRequest.setIsRequestHedged(false); - req.requestContext.setFeedOperationContext(feedOperationContextForMainRequest); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForMainRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, false); + feedOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); + req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForMainRequest); return feedOperation.apply(retryPolicyFactory, req); } @@ -6225,9 +6225,9 @@ private Mono executeFeedOperationWithAvailabilityStrategy( // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient - FeedOperationContext feedOperationContextForMainRequest = new FeedOperationContext(partitionKeyRangesWithSuccess, true); - feedOperationContextForMainRequest.setIsRequestHedged(false); - clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForMainRequest); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForMainRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); + feedOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); + clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForMainRequest); Mono> initialMonoAcrossAllRegions = feedOperation.apply(retryPolicyFactory, clonedRequest) .map(NonTransientFeedOperationResult::new) @@ -6253,9 +6253,9 @@ private Mono executeFeedOperationWithAvailabilityStrategy( region) ); - FeedOperationContext feedOperationContextForHedgedRequest = new FeedOperationContext(partitionKeyRangesWithSuccess, true); - feedOperationContextForHedgedRequest.setIsRequestHedged(true); - clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForHedgedRequest); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForHedgedRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); + feedOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); + clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForHedgedRequest); // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values @@ -6358,7 +6358,7 @@ Mono> apply( RequestOptions requestOptions, CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyPolicyConfig, DiagnosticsClientContext clientContextOverride, - PointOperationContext pointOperationContext, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker, Utils.ValueHolder collectionRoutingMap); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 7f8ac5c3c7ce..539d2f5075cf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -4,14 +4,11 @@ package com.azure.cosmos.implementation.query; import com.azure.cosmos.CosmosDiagnostics; -import com.azure.cosmos.implementation.Configs; -import com.azure.cosmos.implementation.FeedOperationContext; +import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; -import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; -import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.ModelBridgeInternal; @@ -21,7 +18,6 @@ import reactor.core.publisher.SignalType; import java.net.URI; -import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; @@ -180,11 +176,11 @@ private Mono> nextPage(RxDocumentServiceRequest request) { checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); - FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContext(); - checkNotNull(feedOperationContext, "Argument 'feedOperationContext' must not be null!"); + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' must not be null!"); - feedOperationContext.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); + feedOperationContextForCircuitBreaker.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); } }) .doOnError(throwable -> completed.set(true)) @@ -205,12 +201,12 @@ private Mono> nextPage(RxDocumentServiceRequest request) { checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); - FeedOperationContext feedOperationContext = request.requestContext.getFeedOperationContext(); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContext(); - checkNotNull(feedOperationContext, "Argument 'feedOperationContext' must not be null!"); + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' must not be null!"); - if (feedOperationContext.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!feedOperationContext.getIsRequestHedged() && feedOperationContext.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { + if (feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + if (!feedOperationContextForCircuitBreaker.getIsRequestHedged() && feedOperationContextForCircuitBreaker.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { this.handleLocationExceptionForPartitionKeyRange(request); From 678ec7f6e50eaefc65dc3cee34be8a3dc1be86f4 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 31 May 2024 16:17:04 -0400 Subject: [PATCH 056/140] Fixing CI pipeline. --- .../query/DocumentProducerTest.java | 23 +++++++++++++++++++ ...itionEndpointManagerForCircuitBreaker.java | 2 ++ .../query/ChangeFeedFetcher.java | 8 +++++-- .../implementation/query/Paginator.java | 4 +++- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 064bf25bf01b..3f8ce214164b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -548,6 +548,8 @@ public void simple() { , responses)); IDocumentQueryClient queryClient = Mockito.mock(IDocumentQueryClient.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); RxDocumentServiceRequest req = invocation.getArgument(3); @@ -567,6 +569,9 @@ public void simple() { return Mono.just(req); }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer<>( @@ -645,6 +650,7 @@ public void retries() { behaviourAfterException); IDocumentQueryClient queryClient = Mockito.mock(IDocumentQueryClient.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); @@ -665,6 +671,9 @@ public void retries() { return Mono.just(req); }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer<>( @@ -747,6 +756,8 @@ public void retriesExhausted() { exceptionBehaviour); IDocumentQueryClient queryClient = Mockito.mock(IDocumentQueryClient.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); RxDocumentServiceRequest req = invocation.getArgument(3); @@ -766,6 +777,9 @@ public void retriesExhausted() { return Mono.just(req); }).when(queryClient).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(queryClient).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); + String initialContinuationToken = "initial-cp"; DocumentProducer documentProducer = new DocumentProducer( @@ -879,6 +893,7 @@ private int getLastValueInAsc(int initialValue, List> res private IDocumentQueryClient mockQueryClient(List replacementRanges) { IDocumentQueryClient client = Mockito.mock(IDocumentQueryClient.class); RxPartitionKeyRangeCache cache = Mockito.mock(RxPartitionKeyRangeCache.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); doAnswer(invocation -> { Supplier retryPolicyFactory = invocation.getArgument(2); @@ -888,17 +903,25 @@ private IDocumentQueryClient mockQueryClient(List replacement return feedOperation.apply(retryPolicyFactory, req); }).when(client).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + doReturn(cache).when(client).getPartitionKeyRangeCache(); + doReturn(Mono.just(new Utils.ValueHolder<>(replacementRanges))) .when(cache).tryGetOverlappingRangesAsync(any(), any(), any(), anyBoolean(), ArgumentMatchers.any()); + doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); return Mono.just(req); }).when(client).populateFeedRangeHeader(any()); + doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); return Mono.just(req); }).when(client).addPartitionLevelUnavailableRegionsOnRequest(any(), any()); + + doReturn(globalPartitionEndpointManagerForCircuitBreaker).when(client).getGlobalPartitionEndpointManagerForCircuitBreaker(); + doReturn(false).when(globalPartitionEndpointManagerForCircuitBreaker).isPartitionLevelCircuitBreakingApplicable(any()); + return client; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 549ed89f6fb0..7409eef5ac43 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -472,6 +472,8 @@ public LocationSpecificContext handleSuccess( Instant unavailableSinceActual = locationSpecificContext.unavailableSince; if (!forceStatusChange) { if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { + + // todo: make debug logger.info("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java index b7775732a36a..8901f3c94c7e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java @@ -5,6 +5,8 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.GoneException; import com.azure.cosmos.implementation.InvalidPartitionExceptionRetryPolicy; import com.azure.cosmos.implementation.MetadataDiagnosticsContext; @@ -47,8 +49,10 @@ public ChangeFeedFetcher( int top, int maxItemCount, boolean isSplitHandlingDisabled, - OperationContextAndListenerTuple operationContext) { - super(executeFunc, true, top, maxItemCount, operationContext, null, null, null); + OperationContextAndListenerTuple operationContext, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { + super(executeFunc, true, top, maxItemCount, operationContext, null, globalEndpointManager, globalPartitionEndpointManagerForCircuitBreaker); checkNotNull(client, "Argument 'client' must not be null."); checkNotNull(createRequestFunc, "Argument 'createRequestFunc' must not be null."); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java index a8355b40e369..94029c89ede6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java @@ -109,7 +109,9 @@ public static Flux> getChangeFeedQueryResultAsObservable( top, maxPageSize, isSplitHandlingDisabled, - operationContext + operationContext, + client.getGlobalEndpointManager(), + client.getGlobalPartitionEndpointManagerForCircuitBreaker() ), preFetchCount); } From 7e0029a89a633e3055b7fa1b7236b13f0ec384d3 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 1 Jun 2024 12:43:46 -0400 Subject: [PATCH 057/140] Fixing CI pipeline. --- .../implementation/query/FetcherTest.java | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java index 9b60e6777dc0..fd221f2e2556 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java @@ -3,6 +3,8 @@ package com.azure.cosmos.implementation.query; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; @@ -12,6 +14,7 @@ import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.models.ModelBridgeInternal; import io.reactivex.subscribers.TestSubscriber; +import org.mockito.Mockito; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import reactor.core.publisher.Mono; @@ -60,6 +63,9 @@ public static Object[][] queryParamProvider() { @Test(groups = { "unit" }, dataProvider = "queryParams") public void query(CosmosQueryRequestOptions options, int top) { + GlobalEndpointManager globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreakerMock = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + FeedResponse fp1 = FeedResponseBuilder.queryFeedResponseBuilder(Document.class) .withContinuationToken("cp1") .withResults(new Document(), new Document(), new Document()) @@ -95,6 +101,8 @@ public void query(CosmosQueryRequestOptions options, int top) { return Mono.just(rsp); }; + Mockito.when(globalPartitionEndpointManagerForCircuitBreakerMock.isPartitionLevelCircuitBreakingApplicable(Mockito.any())).thenReturn(false); + ServerSideOnlyContinuationFetcherImpl fetcher = new ServerSideOnlyContinuationFetcherImpl<>(createRequestFunc, executeFunc, ModelBridgeInternal.getRequestContinuationFromQueryRequestOptions(options), false, top, ModelBridgeInternal.getMaxItemCountFromQueryRequestOptions(options), @@ -107,8 +115,8 @@ public void query(CosmosQueryRequestOptions options, int top) { .CosmosQueryRequestOptionsHelper .getCosmosQueryRequestOptionsAccessor() .getCancelledRequestDiagnosticsTracker(options), - null, - null); + globalEndpointManagerMock, + globalPartitionEndpointManagerForCircuitBreakerMock); validateFetcher(fetcher, options, top, feedResponseList); } @@ -136,6 +144,9 @@ private void validateFetcher(ServerSideOnlyContinuationFetcherImpl fet @Test(groups = { "unit" }) public void changeFeed() { + GlobalEndpointManager globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreakerMock = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); + CosmosChangeFeedRequestOptions options = CosmosChangeFeedRequestOptions.createForProcessingFromBeginning( FeedRangeEpkImpl.forFullRange()); @@ -166,6 +177,8 @@ public void changeFeed() { Function>> executeFunc = request -> Mono.just(feedResponseList.get(executeIndex.getAndIncrement())); + Mockito.when(globalPartitionEndpointManagerForCircuitBreakerMock.isPartitionLevelCircuitBreakingApplicable(Mockito.any())).thenReturn(false); + ServerSideOnlyContinuationFetcherImpl fetcher = new ServerSideOnlyContinuationFetcherImpl<>(createRequestFunc, executeFunc, null, isChangeFeed, top, options.getMaxItemCount(), @@ -174,8 +187,8 @@ public void changeFeed() { .getCosmosChangeFeedRequestOptionsAccessor() .getOperationContext(options), null, - null, - null); + globalEndpointManagerMock, + globalPartitionEndpointManagerForCircuitBreakerMock); validateFetcher(fetcher, options, feedResponseList); } From f8dba2e8157739abd2e0e6e0628c5ee5f34e0643 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 1 Jun 2024 13:17:36 -0400 Subject: [PATCH 058/140] Handle server-generated 500 errors for circuit breaking purposes. --- .../implementation/ClientRetryPolicy.java | 38 ++++++++++++++----- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index b0f4b1336694..7d8705f6d2e4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -52,14 +52,14 @@ public class ClientRetryPolicy extends DocumentClientRetryPolicy { private RxDocumentServiceRequest request; private RxCollectionCache rxCollectionCache; private final FaultInjectionRequestContext faultInjectionRequestContext; - private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; + private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, GlobalEndpointManager globalEndpointManager, boolean enableEndpointDiscovery, ThrottlingRetryOptions throttlingRetryOptions, RxCollectionCache rxCollectionCache, - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { this.globalEndpointManager = globalEndpointManager; this.failoverRetryCount = 0; @@ -75,7 +75,7 @@ public ClientRetryPolicy(DiagnosticsClientContext diagnosticsClientContext, false); this.rxCollectionCache = rxCollectionCache; this.faultInjectionRequestContext = new FaultInjectionRequestContext(); - this.globalPartitionEndpointManager = globalPartitionEndpointManager; + this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; } @Override @@ -178,6 +178,14 @@ public Mono shouldRetry(Exception e) { ); } + if (clientException != null && + Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.INTERNAL_SERVER_ERROR) && + Exceptions.isSubStatusCode(clientException, HttpConstants.SubStatusCodes.UNKNOWN)) { + + logger.info("Internal server error - IsReadRequest {}", this.isReadRequest, e); + return this.shouldRetryOnInternalServerError(); + } + return this.throttlingRetry.shouldRetry(e); } @@ -334,9 +342,9 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( boolean nonIdempotentWriteRetriesEnabled, CosmosException cosmosException) { - // if partition-level circuit breaker is enabled - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - this.globalPartitionEndpointManager.handleLocationExceptionForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + this.globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); } // The request has failed with 503, SDK need to decide whether it is safe to retry for write operations @@ -393,11 +401,21 @@ private Mono shouldRetryOnRequestTimeout( boolean isReadRequest, boolean nonIdempotentWriteRetriesEnabled) { - if (Configs.isPartitionLevelCircuitBreakerEnabled() && - !isReadRequest && - !nonIdempotentWriteRetriesEnabled) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + if (!isReadRequest && !nonIdempotentWriteRetriesEnabled) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange( + request, + request.requestContext.locationEndpointToRoute); + } + } + + return Mono.just(ShouldRetryResult.NO_RETRY); + } + + private Mono shouldRetryOnInternalServerError() { - this.globalPartitionEndpointManager.handleLocationExceptionForPartitionKeyRange( + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange( request, request.requestContext.locationEndpointToRoute); } From f66a2556bf8604717727a229099656607d51697c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 1 Jun 2024 15:37:41 -0400 Subject: [PATCH 059/140] Handling stale collection cache with retries. --- .../implementation/RxDocumentClientImpl.java | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 0928399eeaeb..4c1e52531c37 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -89,6 +89,7 @@ import reactor.core.publisher.Mono; import reactor.core.publisher.SignalType; import reactor.util.concurrent.Queues; +import reactor.util.retry.Retry; import java.io.IOException; import java.io.UnsupportedEncodingException; @@ -5827,7 +5828,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat DiagnosticsClientContext innerDiagnosticsFactory, String collectionLink) { - return this.collectionCache.resolveByNameAsync(null, collectionLink, null) + return Mono.defer(() -> this.collectionCache.resolveByNameAsync(null, collectionLink, null) .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { @@ -6001,7 +6002,24 @@ private Mono> wrapPointOperationWithAvailabilityStrat return exception; }) .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); - })); + })) + ).retryWhen(Retry.fixedDelay(10, Duration.ofSeconds(1)).filter(throwable -> { + Throwable unwrappedThrowable = Exceptions.unwrap(throwable); + + if (unwrappedThrowable instanceof NotFoundException) { + // NotFoundException ex = Utils.as(unwrappedThrowable, NotFoundException.class); + // return ex.getMessage().contains("collectionRoutingMapValueHolder.v cannot be null!"); + return true; + } + + return false; + }).doBeforeRetry((retrySignal) -> this.collectionCache + .refresh( + null, + collectionLink, + null) + ) + ); } From 9ac1ff586cdda48d08f72471bcb4fe0f6140831f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 1 Jun 2024 17:20:31 -0400 Subject: [PATCH 060/140] Handling stale collection cache with retries. --- .../implementation/RxDocumentClientImpl.java | 39 +++++++++++++------ .../batch/BulkExecutorUtil.java | 4 +- 2 files changed, 28 insertions(+), 15 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 4c1e52531c37..d8dd951ffa50 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -36,6 +36,7 @@ import com.azure.cosmos.implementation.directconnectivity.ServerStoreModel; import com.azure.cosmos.implementation.directconnectivity.StoreClient; import com.azure.cosmos.implementation.directconnectivity.StoreClientFactory; +import com.azure.cosmos.implementation.directconnectivity.WFConstants; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.http.HttpClient; @@ -5848,7 +5849,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat // todo: investigate retry policy in stale cache scenarios if (collectionRoutingMapValueHolder.v == null) { - return Mono.error(new NotFoundException("collectionRoutingMapValueHolder.v cannot be null!")); + return Mono.error(new CollectionRoutingMapNotFoundException("collectionRoutingMapValueHolder.v cannot be null!")); } nonNullRequestOptions.setPartitionKeyDefinition(collection.getPartitionKey()); @@ -6003,17 +6004,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat }) .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); })) - ).retryWhen(Retry.fixedDelay(10, Duration.ofSeconds(1)).filter(throwable -> { - Throwable unwrappedThrowable = Exceptions.unwrap(throwable); - - if (unwrappedThrowable instanceof NotFoundException) { - // NotFoundException ex = Utils.as(unwrappedThrowable, NotFoundException.class); - // return ex.getMessage().contains("collectionRoutingMapValueHolder.v cannot be null!"); - return true; - } - - return false; - }).doBeforeRetry((retrySignal) -> this.collectionCache + ) + .retryWhen(Retry.fixedDelay(10, Duration.ofSeconds(1)) + .filter(throwable -> throwable instanceof CollectionRoutingMapNotFoundException) + .doBeforeRetry((retrySignal) -> this.collectionCache .refresh( null, collectionLink, @@ -6532,4 +6526,25 @@ public void reset() { this.isMerged.set(false); } } + + static class CollectionRoutingMapNotFoundException extends CosmosException { + + private static final long serialVersionUID = 1L; + + /** + * Instantiates a new Invalid partition exception. + * + * @param msg the msg + */ + public CollectionRoutingMapNotFoundException(String msg) { + super(HttpConstants.StatusCodes.NOTFOUND, msg); + setSubStatus(); + } + + private void setSubStatus() { + this.getResponseHeaders().put( + WFConstants.BackendHeaders.SUB_STATUS, + Integer.toString(HttpConstants.SubStatusCodes.INCORRECT_CONTAINER_RID_SUB_STATUS)); + } + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java index 0042bb612496..c695378e6720 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java @@ -110,7 +110,7 @@ static Mono resolvePartitionKeyRangeId( if (operation instanceof ItemBulkOperation) { final ItemBulkOperation itemBulkOperation = (ItemBulkOperation) operation; - final Mono pkRangeIdMono = Mono.defer(() -> + return Mono.defer(() -> BulkExecutorUtil.getCollectionInfoAsync(docClientWrapper, container, collectionBeforeRecreation.get()) .flatMap(collection -> { final PartitionKeyDefinition definition = collection.getPartitionKey(); @@ -152,8 +152,6 @@ static Mono resolvePartitionKeyRangeId( null) ) ); - - return pkRangeIdMono; } else { throw new UnsupportedOperationException("Unknown CosmosItemOperation."); } From d6d4545fd5861993773e032c96d470e47aa6d52a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 4 Jun 2024 18:29:59 -0400 Subject: [PATCH 061/140] Adding tests. --- .../PartitionLevelCircuitBreakerTests.java | 606 ++++++++++++++++-- ...itionEndpointManagerForCircuitBreaker.java | 20 +- .../implementation/RxDocumentClientImpl.java | 7 +- 3 files changed, 563 insertions(+), 70 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 18fb0d7f57aa..2e77c7ef5494 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -9,13 +9,20 @@ import com.azure.cosmos.CosmosAsyncContainer; import com.azure.cosmos.CosmosAsyncDatabase; import com.azure.cosmos.CosmosClientBuilder; +import com.azure.cosmos.CosmosDiagnosticsContext; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; import com.azure.cosmos.CosmosException; +import com.azure.cosmos.CosmosRegionSwitchHint; +import com.azure.cosmos.SessionRetryOptionsBuilder; import com.azure.cosmos.TestObject; import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; import com.azure.cosmos.faultinjection.FaultInjectionTestBase; +import com.azure.cosmos.implementation.caches.RxCollectionCache; +import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; +import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; +import com.azure.cosmos.implementation.feedranges.FeedRangePartitionKeyImpl; import com.azure.cosmos.implementation.guava25.base.Function; import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchResponse; @@ -43,6 +50,7 @@ import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorResult; import com.azure.cosmos.test.faultinjection.FaultInjectionServerErrorType; import org.testng.SkipException; +import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Factory; @@ -55,29 +63,52 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; -import java.util.stream.Collectors; +import java.util.function.Consumer; -import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.fail; public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { + private static final ImplementationBridgeHelpers.CosmosAsyncContainerHelper.CosmosAsyncContainerAccessor containerAccessor + = ImplementationBridgeHelpers.CosmosAsyncContainerHelper.getCosmosAsyncContainerAccessor(); private List writeRegions; - private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY + private static final CosmosEndToEndOperationLatencyPolicyConfig noEndToEndTimeout + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofDays(1)).build(); + + private static final CosmosEndToEndOperationLatencyPolicyConfig twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) .build(); - private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_TIMEOUT + private static final CosmosEndToEndOperationLatencyPolicyConfig twoSecondEndToEndTimeoutWithoutAvailabilityStrategy = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) .build(); + private Consumer validateDiagnosticsContextHasFirstPreferredRegionOnly = null; + + private Consumer validateDiagnosticsContextHasSecondPreferredRegionOnly = null; + + private Consumer validateDiagnosticsContextHasFirstAndSecondPreferredRegions = null; + + private static final CosmosRegionSwitchHint noRegionSwitchHint = null; + + private String firstPreferredRegion = null; + + private String secondPreferredRegion = null; + + private String sharedAsyncDatabaseId = null; + + private String sharedMultiPartitionAsyncContainerId = null; + + private String singlePartitionAsyncContainerId = null; + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") public PartitionLevelCircuitBreakerTests(CosmosClientBuilder cosmosClientBuilder) { super(cosmosClientBuilder); @@ -91,6 +122,22 @@ public void beforeClass() { DatabaseAccount databaseAccount = globalEndpointManager.getLatestDatabaseAccount(); this.writeRegions = new ArrayList<>(this.getRegionMap(databaseAccount, true).keySet()); + + CosmosAsyncDatabase sharedAsyncDatabase = getSharedCosmosDatabase(testClient); + CosmosAsyncContainer sharedAsyncMultiPartitionContainer = getSharedMultiPartitionCosmosContainerWithIdAsPartitionKey(testClient); + + this.sharedAsyncDatabaseId = sharedAsyncDatabase.getId(); + this.sharedMultiPartitionAsyncContainerId = sharedAsyncMultiPartitionContainer.getId(); + + this.singlePartitionAsyncContainerId = UUID.randomUUID().toString(); + sharedAsyncDatabase.createContainerIfNotExists(this.singlePartitionAsyncContainerId, "/id").block(); + + try { + Thread.sleep(3000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + } finally { logger.debug("beforeClass executed..."); } @@ -108,134 +155,359 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { Function> transitTimeoutRuleGenerator = PartitionLevelCircuitBreakerTests::buildTransitTimeoutRules; + Function> internalServerErrorRuleGenerator + = PartitionLevelCircuitBreakerTests::buildInternalServerErrorRules; + + Function> tooManyRequestsRuleGenerator + = PartitionLevelCircuitBreakerTests::buildTooManyRequestsRules; + + Function> readSessionNotAvailableRuleGenerator + = PartitionLevelCircuitBreakerTests::buildReadSessionNotAvailableRules; + + this.validateDiagnosticsContextHasFirstPreferredRegionOnly = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); + assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + this.validateDiagnosticsContextHasSecondPreferredRegionOnly = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); + assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(2); + assertThat(ctx.getContactedRegionNames()).contains(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); + assertThat(ctx.getContactedRegionNames()).contains(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); + }; + return new Object[][] { new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withHitLimit(12), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withHitLimit(7), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withHitLimit(7), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) .withHitLimit(7), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) .withHitLimit(7), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withHitLimit(7), + .withHitLimit(6), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(7), + .withHitLimit(12), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) - .withHitLimit(7), + .withHitLimit(6), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) .withHitLimit(12), serviceUnavailableRulesGenerator, - null + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)), serverGeneratedGoneRuleGenerator, - TWO_SECOND_TIMEOUT + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)), serverGeneratedGoneRuleGenerator, - TWO_SECOND_TIMEOUT + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)), serverGeneratedGoneRuleGenerator, - TWO_SECOND_TIMEOUT + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)), serverGeneratedGoneRuleGenerator, - TWO_SECOND_TIMEOUT + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)), serverGeneratedGoneRuleGenerator, - TWO_SECOND_TIMEOUT + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)), serverGeneratedGoneRuleGenerator, - TWO_SECOND_TIMEOUT + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)), serverGeneratedGoneRuleGenerator, - TWO_SECOND_TIMEOUT + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), transitTimeoutRuleGenerator, - null + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly }, new Object[] { + String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), transitTimeoutRuleGenerator, - null + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withHitLimit(12), + internalServerErrorRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withHitLimit(12), + internalServerErrorRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withHitLimit(12), + internalServerErrorRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(12), + internalServerErrorRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + tooManyRequestsRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + tooManyRequestsRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + tooManyRequestsRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + tooManyRequestsRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + readSessionNotAvailableRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + readSessionNotAvailableRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + readSessionNotAvailableRuleGenerator, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly } }; } @@ -252,13 +524,21 @@ public Object[][] readManyTestConfigs() { @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs") public void operationHitsTerminalExceptionInFirstPreferredRegion( + String testType, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, Function> faultInjectionRuleGenerator, - CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg) { + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative) { - logger.info("Checking circuit breaking behavior for {}", faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()); + logger.info("Checking circuit breaking behavior for test type {}", testType); List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); @@ -267,25 +547,35 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( throw new SkipException("Test is not applicable to GATEWAY connectivity mode!"); } - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + } - String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/id"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(getProvisionedThroughputForContainer(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType())); + validateStringArg(this.sharedAsyncDatabaseId); + CosmosAsyncDatabase database = client.getDatabase(this.sharedAsyncDatabaseId); + CosmosAsyncContainer container; OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); try { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); + String designatedContainerId = getContainerIdByFaultInjectionOperationType(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()); - Thread.sleep(10_000); + validateStringArg(designatedContainerId); + container = database.getContainer(designatedContainerId); + + RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(client); + + RxCollectionCache collectionCache = ReflectionUtils.getClientCollectionCache(documentClient); + RxPartitionKeyRangeCache partitionKeyRangeCache = ReflectionUtils.getPartitionKeyRangeCache(documentClient); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); int testObjCountToBootstrapFrom = getTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); List testObjects = new ArrayList<>(); @@ -298,16 +588,49 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( FeedRange faultyFeedRange; + final Utils.ValueHolder faultyFeedRangeEpkImpl = new Utils.ValueHolder<>(); + final Utils.ValueHolder faultyFeedRangePartitionKeyImpl = new Utils.ValueHolder<>(); + final Utils.ValueHolder> faultyPartitionKeyRanges = new Utils.ValueHolder<>(); + final Utils.ValueHolder faultyDocumentCollection = new Utils.ValueHolder<>(); + if (testObjects.size() != 1) { faultyFeedRange = FeedRange.forFullRange(); + faultyFeedRangeEpkImpl.v = (FeedRangeEpkImpl) faultyFeedRange; } else { faultyFeedRange = FeedRange.forLogicalPartition(new PartitionKey(testObjects.get(0).getId())); + faultyFeedRangePartitionKeyImpl.v = (FeedRangePartitionKeyImpl) faultyFeedRange; } faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); faultInjectionRuleParamsWrapper.withFaultInjectionApplicableRegions(Arrays.asList(preferredRegions.get(0))); + if (faultyFeedRangeEpkImpl.v != null) { + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangeEpkImpl.v.getRange(), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } else if (faultyFeedRangePartitionKeyImpl.v != null) { + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangePartitionKeyImpl.v.getEffectiveRange(collection.getPartitionKey()), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } + + assertThat(faultyPartitionKeyRanges.v).isNotNull(); + assertThat(faultyPartitionKeyRanges.v).isNotEmpty(); + assertThat(faultyDocumentCollection.v).isNotNull(); + List faultInjectionRules = faultInjectionRuleGenerator.apply(faultInjectionRuleParamsWrapper); if (faultInjectionRules != null && !faultInjectionRules.isEmpty()) { @@ -317,7 +640,6 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( assertThat(faultInjectedFunc).isNotNull().as("faultInjectedFunc cannot be null!"); - if (e2eLatencyPolicyCfg != null) { operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions() .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); @@ -331,50 +653,70 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( operationInvocationParamsWrapper.asyncContainer = container; operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = faultyFeedRange; + operationInvocationParamsWrapper.feedRangeForQuery = faultyFeedRange; CosmosFaultInjectionHelper .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) .block(); + boolean hasReachedCircuitBreakingThreshold = false; + int executionCountAfterCircuitBreakingThreshold = 0; + for (int i = 1; i <= 15; i++) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); - logger.info("Hit count : {}", faultInjectionRules.stream().mapToLong(FaultInjectionRule::getHitCount).sum()); + int expectedCircuitBreakingThreshold = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? 5 : 10; + + if (!hasReachedCircuitBreakingThreshold) { + hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == globalPartitionEndpointManagerForCircuitBreaker.getExceptionCountByPartitionKeyRange( + new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId())); + } else { + executionCountAfterCircuitBreakingThreshold++; + } if (response.cosmosItemResponse != null) { assertThat(response.cosmosItemResponse).isNotNull(); assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); - response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + if (executionCountAfterCircuitBreakingThreshold > 1) { + logger.info("In circuit breaking assertion for item response..."); + validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); + } } else if (response.feedResponse != null) { assertThat(response.feedResponse).isNotNull(); assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); - response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + if (executionCountAfterCircuitBreakingThreshold > 1) { + logger.info("In circuit breaking assertion for feed response..."); + validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); + } } else if (response.cosmosException != null) { assertThat(response.cosmosException).isNotNull(); assertThat(response.cosmosException.getDiagnostics()).isNotNull(); - response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + if (!hasReachedCircuitBreakingThreshold) { + + logger.info("In circuit breaking assertion for exception..."); + CosmosDiagnosticsContext ctx = response.cosmosException.getDiagnostics().getDiagnosticsContext(); + + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); + assertThat(ctx.getContactedRegionNames().iterator().next()).isEqualTo(preferredRegions.get(0).toLowerCase(Locale.ROOT)); + } } else if (response.batchResponse != null) { assertThat(response.batchResponse).isNotNull(); assertThat(response.batchResponse.getDiagnostics()).isNotNull(); - response.batchResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + if (executionCountAfterCircuitBreakingThreshold > 1) { + logger.info("In circuit breaking assertion for batch response..."); + validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); + } } } - logger.info("Sleep for 120 seconds"); - Thread.sleep(120_000); + logger.info("Sleep for 90 seconds"); + Thread.sleep(90_000); for (int i = 16; i <= 30; i++) { operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); @@ -386,16 +728,12 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( assertThat(response.cosmosItemResponse).isNotNull(); assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); - response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); } else if (response.feedResponse != null) { assertThat(response.feedResponse).isNotNull(); assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); - response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); } else if (response.cosmosException != null) { assertThat(response.cosmosException).isNotNull(); assertThat(response.cosmosException.getDiagnostics()).isNotNull(); @@ -407,9 +745,7 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( assertThat(response.batchResponse).isNotNull(); assertThat(response.batchResponse.getDiagnostics()).isNotNull(); - response.batchResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); } } } @@ -422,7 +758,6 @@ public void operationHitsTerminalExceptionInFirstPreferredRegion( fail("Test should have passed!"); } finally { System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); safeClose(client); } } @@ -693,7 +1028,6 @@ public void operationHitsTerminalExceptionInMultipleContainers() { if (response1.cosmosItemResponse != null) { assertThat(response1.cosmosItemResponse).isNotNull(); assertThat(response1.cosmosItemResponse.getDiagnostics()).isNotNull(); - response1.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( regionContacted -> logger.info("Region contacted : {}", regionContacted) ); @@ -975,6 +1309,7 @@ private static Function { private final CosmosItemResponse cosmosItemResponse; @@ -1138,6 +1500,7 @@ private static class OperationInvocationParamsWrapper { public CosmosQueryRequestOptions queryRequestOptions; public CosmosItemRequestOptions patchItemRequestOptions; public FeedRange feedRangeToDrainForChangeFeed; + public FeedRange feedRangeForQuery; } private static class FaultInjectionRuleParamsWrapper { @@ -1315,4 +1678,115 @@ private static List buildTransitTimeoutRules(FaultInjectionR return faultInjectionRules; } + + private static List buildReadSessionNotAvailableRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("read-session-not-available-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildTooManyRequestsRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.TOO_MANY_REQUEST) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("too-many-requests-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static List buildInternalServerErrorRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("internal-server-error-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .hitLimit(paramsWrapper.getHitLimit()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + + private static boolean doesOperationHaveWriteSemantics(FaultInjectionOperationType faultInjectionOperationType) { + switch (faultInjectionOperationType) { + + case DELETE_ITEM: + case PATCH_ITEM: + case UPSERT_ITEM: + case BATCH_ITEM: + case REPLACE_ITEM: + case CREATE_ITEM: + return true; + case READ_ITEM: + case QUERY_ITEM: + case READ_FEED_ITEM: + return false; + default: + throw new IllegalArgumentException("Unsupported operation type : " + faultInjectionOperationType); + } + } + + private static void validateStringArg(String input) { + assertThat(input).isNotNull(); + assertThat(input).isNotEmpty(); + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 7409eef5ac43..6e3e198d48ae 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -107,7 +107,7 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r } String resourceId = request.getResourceId(); - logger.info("Handling success : {}", resourceId); +// logger.info("Handling success : {}", resourceId); PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); URI succeededLocation = request.requestContext.locationEndpointToRoute; @@ -132,7 +132,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso checkNotNull(partitionKeyRange, "Supplied partitionKeyRange cannot be null!"); checkNotNull(resourceId, "Supplied resourceId cannot be null!"); - logger.info("Fetching unavailable regions for resource address : {}", resourceId); +// logger.info("Fetching unavailable regions for resource address : {}", resourceId); PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); @@ -701,6 +701,7 @@ private static int getAllowedExceptionCountToMaintainStatus(LocationUnavailabili case HealthyTentative: return 5; case Healthy: + case Unavailable: return 0; default: throw new IllegalStateException("Unsupported health status: " + status); @@ -744,4 +745,19 @@ private static int getMinimumSuccessCountForStatusUpgrade(LocationUnavailability } } } + + // todo: keep private and access through reflection + public int getExceptionCountByPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { + + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + int count = 0; + + for (LocationSpecificContext locationSpecificContext : partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition.values()) { + count += locationSpecificContext.exceptionCountForRead + locationSpecificContext.exceptionCountForWrite; + } + + return count; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index d8dd951ffa50..bed741275fbc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2383,9 +2383,12 @@ private Mono handleRegionFeedbackForPointOperation( if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); } - } else { - this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); } + // todo: investigate below scenario - gets called when INTERNAL_SERVER_ERROR injected + // todo: something is causing cancellation w/o e2e operation timeout set +// else { +// this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); +// } } }); } From fda4bca7fa69b266243b86e366d1d7b953893b2b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 5 Jun 2024 13:17:47 -0400 Subject: [PATCH 062/140] Adding tests. --- .../PartitionLevelCircuitBreakerTests.java | 2 +- ...titionEndpointManagerForCircuitBreaker.java | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 2e77c7ef5494..888a9878e66f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -403,7 +403,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withHitLimit(12), + .withHitLimit(6), internalServerErrorRuleGenerator, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java index 6e3e198d48ae..4b82a301f06c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -340,7 +340,8 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe } } - private static class LocationSpecificContext { + // todo: (abhmohanty) decouple this? + public class LocationSpecificContext { private final int exceptionCountForWrite; private final int successCountForWrite; private final int exceptionCountForRead; @@ -665,7 +666,8 @@ public int hashCode() { } } - private enum LocationUnavailabilityStatus { + // todo (abhmohanty): does this need to be public + public enum LocationUnavailabilityStatus { Healthy, HealthyWithFailures, Unavailable, HealthyTentative } @@ -760,4 +762,16 @@ public int getExceptionCountByPartitionKeyRange(PartitionKeyRangeWrapper partiti return count; } + + // todo: keep private and access through reflection + public Map getLocationToLocationSpecificContextMappings(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { + return partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition; + } + + return null; + } } From fa151c2d22fde94fdde1782f3bfcc9a73487b029 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 13 Jun 2024 22:07:04 -0400 Subject: [PATCH 063/140] Adding tests. --- .../implementation/ClientRetryPolicyTest.java | 1 + ...EndpointManagerForCircuitBreakerTests.java | 525 +++++ .../PartitionLevelCircuitBreakerTests.java | 1852 +++++++++++++---- ...eCollectionAwareClientRetryPolicyTest.java | 2 +- .../RxGatewayStoreModelTest.java | 1 + .../SpyClientUnderTestFactory.java | 1 + .../AddressResolverTest.java | 2 +- .../GlobalAddressResolverTest.java | 2 +- .../query/DocumentProducerTest.java | 2 +- .../implementation/query/FetcherTest.java | 2 +- .../implementation/AsyncDocumentClient.java | 1 + .../implementation/ChangeFeedQueryImpl.java | 1 + .../implementation/ClientRetryPolicy.java | 1 + .../azure/cosmos/implementation/Configs.java | 19 +- .../DocumentServiceRequestContext.java | 5 +- ...FeedOperationContextForCircuitBreaker.java | 2 + .../implementation/GlobalEndpointManager.java | 3 +- .../cosmos/implementation/RetryPolicy.java | 1 + .../implementation/RxDocumentClientImpl.java | 207 +- .../RxDocumentServiceRequest.java | 1 + .../implementation/RxGatewayStoreModel.java | 7 +- ...nsecutiveExceptionBasedCircuitBreaker.java | 261 +++ ...itionEndpointManagerForCircuitBreaker.java | 314 +-- .../circuitBreaker/ICircuitBreaker.java | 7 + .../LocationSpecificContext.java | 68 + .../PartitionLevelCircuitBreakerConfig.java | 55 + .../directconnectivity/AddressResolver.java | 2 +- .../GlobalAddressResolver.java | 2 +- .../directconnectivity/IAddressResolver.java | 2 +- .../directconnectivity/StoreClient.java | 7 - .../query/ChangeFeedFetcher.java | 2 +- .../cosmos/implementation/query/Fetcher.java | 26 +- .../query/IDocumentQueryClient.java | 2 +- .../implementation/query/Paginator.java | 2 +- ...ServerSideOnlyContinuationFetcherImpl.java | 4 +- .../implementation/routing/LocationCache.java | 2 - .../src/main/java/module-info.java | 2 + 37 files changed, 2586 insertions(+), 810 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java rename sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/{ => circuitBreaker}/GlobalPartitionEndpointManagerForCircuitBreaker.java (67%) create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java index 544b2c990ec1..2e4ae2c9f956 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientRetryPolicyTest.java @@ -6,6 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosException; import com.azure.cosmos.ThrottlingRetryOptions; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.ChannelAcquisitionException; import io.netty.handler.timeout.ReadTimeoutException; import io.reactivex.subscribers.TestSubscriber; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java new file mode 100644 index 000000000000..20e454ec4b50 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -0,0 +1,525 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificContext; +import com.azure.cosmos.implementation.guava25.collect.ImmutableList; +import org.apache.commons.lang3.tuple.Pair; +import org.mockito.Mockito; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import java.net.URI; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import static com.azure.cosmos.implementation.TestUtils.mockDiagnosticsClientContext; +import static org.assertj.core.api.Assertions.assertThat; + +public class GlobalPartitionEndpointManagerForCircuitBreakerTests { + + private final static Pair LocationEastUsEndpointToLocationPair = Pair.of(createUrl("https://contoso-east-us.documents.azure.com"), "eastus"); + private final static Pair LocationEastUs2EndpointToLocationPair = Pair.of(createUrl("https://contoso-east-us-2.documents.azure.com"), "eastus2"); + private final static Pair LocationCentralUsEndpointToLocationPair = Pair.of(createUrl("https://contoso-central-us.documents.azure.com"), "centralus"); + + private static final boolean READ_OPERATION_TRUE = true; + + private GlobalEndpointManager globalEndpointManagerMock; + + @BeforeClass(groups = {"unit"}) + public void beforeClass() { + this.globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); + } + + @DataProvider(name = "partitionLevelCircuitBreakerConfigs") + public Object[][] partitionLevelCircuitBreakerConfigs() { + return new Object[][] { + new Object[] { + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"LOW\"}", + READ_OPERATION_TRUE + }, + new Object[] { + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"MEDIUM\"}", + READ_OPERATION_TRUE + }, + new Object[] { + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"HIGH\"}", + READ_OPERATION_TRUE + }, + new Object[] { + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"LOW\"}", + !READ_OPERATION_TRUE + }, + new Object[] { + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"MEDIUM\"}", + !READ_OPERATION_TRUE + }, + new Object[] { + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"HIGH\"}", + !READ_OPERATION_TRUE + } + }; + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationSuccessForPartitionKeyRange(request); + + Map locationToLocationSpecificContextMappings + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + + LocationSpecificContext locationSpecificContext + = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getReadEndpoints()).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getWriteEndpoints()).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + + Map locationToLocationSpecificContextMappings + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + + LocationSpecificContext locationSpecificContext + = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Map locationToLocationSpecificContextMappings + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + + LocationSpecificContext locationSpecificContext + = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Map locationToLocationSpecificContextMappings + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + + LocationSpecificContext locationSpecificContext + = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + + try { + Thread.sleep(65_000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + locationSpecificContext = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Map locationToLocationSpecificContextMappings + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + + LocationSpecificContext locationSpecificContext + = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + + try { + Thread.sleep(65_000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + locationSpecificContext = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + int successCountToUpgradeStatus = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyTentative, readOperationTrue); + + for (int i = 1; i <= successCountToUpgradeStatus + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationSuccessForPartitionKeyRange(request); + } + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + Map locationToLocationSpecificContextMappings + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + + LocationSpecificContext locationSpecificContext + = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + + try { + Thread.sleep(65_000); + } catch (Exception ex) { + throw new RuntimeException(ex); + } + + exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyTentative, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + } + + locationSpecificContext = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void allRegionsUnhealthyHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + globalPartitionEndpointManagerForCircuitBreaker.init(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker + .getConsecutiveExceptionBasedCircuitBreaker() + .getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationEastUsEndpointToLocationPair.getKey()); + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request, LocationCentralUsEndpointToLocationPair.getKey()); + } + + Map locationToLocationSpecificContextMappings + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + + assertThat(locationToLocationSpecificContextMappings).isNull(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( + OperationType operationType, + ResourceType resourceType, + String collectionResourceId, + String partitionKeyRangeId, + String minInclusive, + String maxExclusive, + URI locationEndpointToRoute) { + + RxDocumentServiceRequest request = RxDocumentServiceRequest.create( + mockDiagnosticsClientContext(), + operationType, + resourceType); + + request.setResourceId(collectionResourceId); + + request.requestContext.resolvedPartitionKeyRange = new PartitionKeyRange(partitionKeyRangeId, minInclusive, maxExclusive); + request.requestContext.locationEndpointToRoute = locationEndpointToRoute; + request.requestContext.setExcludeRegions(Collections.emptyList()); + + return request; + } + + private static URI createUrl(String url) { + try { + return new URI(url); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 888a9878e66f..eb3a54f286f8 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -20,6 +20,7 @@ import com.azure.cosmos.faultinjection.FaultInjectionTestBase; import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.feedranges.FeedRangePartitionKeyImpl; @@ -34,6 +35,7 @@ import com.azure.cosmos.models.CosmosPatchItemRequestOptions; import com.azure.cosmos.models.CosmosPatchOperations; import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.models.CosmosReadManyRequestOptions; import com.azure.cosmos.models.FeedRange; import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.PartitionKey; @@ -55,6 +57,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Factory; import org.testng.annotations.Test; +import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import java.time.Duration; @@ -79,6 +82,7 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { = ImplementationBridgeHelpers.CosmosAsyncContainerHelper.getCosmosAsyncContainerAccessor(); private List writeRegions; + private static final CosmosEndToEndOperationLatencyPolicyConfig noEndToEndTimeout = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofDays(1)).build(); @@ -91,21 +95,63 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) .build(); - private Consumer validateDiagnosticsContextHasFirstPreferredRegionOnly = null; + Consumer validateDiagnosticsContextHasFirstPreferredRegionOnly = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); + assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + Consumer validateDiagnosticsContextHasSecondPreferredRegionOnly = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); + assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + Consumer validateDiagnosticsContextHasFirstAndSecondPreferredRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(2); + assertThat(ctx.getContactedRegionNames()).contains(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); + assertThat(ctx.getContactedRegionNames()).contains(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); + }; + + Consumer validateDiagnosticsContextHasAllRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(this.writeRegions.size()); + + for (String region : this.writeRegions) { + assertThat(ctx.getContactedRegionNames()).contains(region.toLowerCase(Locale.ROOT)); + } + }; + + private final Function> buildServiceUnavailableError + = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; + + private final Function> buildServerGeneratedGoneError + = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneRules; - private Consumer validateDiagnosticsContextHasSecondPreferredRegionOnly = null; + private final Function> buildTooManyRequestsError + = PartitionLevelCircuitBreakerTests::buildTooManyRequestsRules; - private Consumer validateDiagnosticsContextHasFirstAndSecondPreferredRegions = null; + private final Function> buildReadWriteSessionNotAvailableRules + = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableRules; private static final CosmosRegionSwitchHint noRegionSwitchHint = null; + private static final Boolean nonIdempotentWriteRetriesEnabled = true; + private String firstPreferredRegion = null; private String secondPreferredRegion = null; private String sharedAsyncDatabaseId = null; - private String sharedMultiPartitionAsyncContainerId = null; + private String sharedMultiPartitionAsyncContainerIdWhereIdIsPartitionKey = null; + + private String sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey = null; private String singlePartitionAsyncContainerId = null; @@ -124,10 +170,12 @@ public void beforeClass() { this.writeRegions = new ArrayList<>(this.getRegionMap(databaseAccount, true).keySet()); CosmosAsyncDatabase sharedAsyncDatabase = getSharedCosmosDatabase(testClient); - CosmosAsyncContainer sharedAsyncMultiPartitionContainer = getSharedMultiPartitionCosmosContainerWithIdAsPartitionKey(testClient); + CosmosAsyncContainer sharedMultiPartitionCosmosContainerWithIdAsPartitionKey = getSharedMultiPartitionCosmosContainerWithIdAsPartitionKey(testClient); + CosmosAsyncContainer sharedAsyncMultiPartitionContainerWithMyPkAsPartitionKey = getSharedMultiPartitionCosmosContainer(testClient); this.sharedAsyncDatabaseId = sharedAsyncDatabase.getId(); - this.sharedMultiPartitionAsyncContainerId = sharedAsyncMultiPartitionContainer.getId(); + this.sharedMultiPartitionAsyncContainerIdWhereIdIsPartitionKey = sharedMultiPartitionCosmosContainerWithIdAsPartitionKey.getId(); + this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey = sharedAsyncMultiPartitionContainerWithMyPkAsPartitionKey.getId(); this.singlePartitionAsyncContainerId = UUID.randomUUID().toString(); sharedAsyncDatabase.createContainerIfNotExists(this.singlePartitionAsyncContainerId, "/id").block(); @@ -146,658 +194,1333 @@ public void beforeClass() { @DataProvider(name = "partitionLevelCircuitBreakerTestConfigs") public Object[][] partitionLevelCircuitBreakerTestConfigs() { - Function> serviceUnavailableRulesGenerator + Function> generateServiceUnavailableRules = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; - Function> serverGeneratedGoneRuleGenerator + Function> generateServerGeneratedGoneRules = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneRules; - Function> transitTimeoutRuleGenerator + Function> generateTransitTimeoutRules = PartitionLevelCircuitBreakerTests::buildTransitTimeoutRules; - Function> internalServerErrorRuleGenerator + Function> generateInternalServerErrorRules = PartitionLevelCircuitBreakerTests::buildInternalServerErrorRules; - Function> tooManyRequestsRuleGenerator + Function> generateTooManyRequestsRules = PartitionLevelCircuitBreakerTests::buildTooManyRequestsRules; - Function> readSessionNotAvailableRuleGenerator - = PartitionLevelCircuitBreakerTests::buildReadSessionNotAvailableRules; - - this.validateDiagnosticsContextHasFirstPreferredRegionOnly = (ctx) -> { - assertThat(ctx).isNotNull(); - assertThat(ctx.getContactedRegionNames()).isNotNull(); - assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); - assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); - }; - - this.validateDiagnosticsContextHasSecondPreferredRegionOnly = (ctx) -> { - assertThat(ctx).isNotNull(); - assertThat(ctx.getContactedRegionNames()).isNotNull(); - assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); - assertThat(ctx.getContactedRegionNames().stream().iterator().next()).isEqualTo(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); - }; + Function> generateReadOrWriteSessionNotAvailableRules + = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableRules; - this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions = (ctx) -> { - assertThat(ctx).isNotNull(); - assertThat(ctx.getContactedRegionNames()).isNotNull(); - assertThat(ctx.getContactedRegionNames().size()).isEqualTo(2); - assertThat(ctx.getContactedRegionNames()).contains(this.firstPreferredRegion.toLowerCase(Locale.ROOT)); - assertThat(ctx.getContactedRegionNames()).contains(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); - }; + Function> generateRetryWithRules + = PartitionLevelCircuitBreakerTests::buildRetryWithFaultInjectionRules; - return new Object[][] { - new Object[] { + return new Object[][]{ + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withHitLimit(12), - serviceUnavailableRulesGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) - .withHitLimit(7), - serviceUnavailableRulesGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) - .withHitLimit(7), - serviceUnavailableRulesGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) - .withHitLimit(7), - serviceUnavailableRulesGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) - .withHitLimit(7), - serviceUnavailableRulesGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - serviceUnavailableRulesGenerator, + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(12), - serviceUnavailableRulesGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - serviceUnavailableRulesGenerator, + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) - .withHitLimit(12), - serviceUnavailableRulesGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateServiceUnavailableRules, noEndToEndTimeout, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - serverGeneratedGoneRuleGenerator, + generateServerGeneratedGoneRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - serverGeneratedGoneRuleGenerator, + generateServerGeneratedGoneRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - serverGeneratedGoneRuleGenerator, + generateServerGeneratedGoneRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - serverGeneratedGoneRuleGenerator, + generateServerGeneratedGoneRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - serverGeneratedGoneRuleGenerator, + generateServerGeneratedGoneRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - serverGeneratedGoneRuleGenerator, + generateServerGeneratedGoneRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - serverGeneratedGoneRuleGenerator, + generateServerGeneratedGoneRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), - transitTimeoutRuleGenerator, + generateTransitTimeoutRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, - new Object[] { + new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), - transitTimeoutRuleGenerator, + generateTransitTimeoutRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withHitLimit(12), - internalServerErrorRuleGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateInternalServerErrorRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - internalServerErrorRuleGenerator, + generateInternalServerErrorRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) - .withHitLimit(12), - internalServerErrorRuleGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateInternalServerErrorRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(12), - internalServerErrorRuleGenerator, + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateInternalServerErrorRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - tooManyRequestsRuleGenerator, + generateTooManyRequestsRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - tooManyRequestsRuleGenerator, + generateTooManyRequestsRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - tooManyRequestsRuleGenerator, + generateTooManyRequestsRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - tooManyRequestsRuleGenerator, + generateTooManyRequestsRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - readSessionNotAvailableRuleGenerator, + generateReadOrWriteSessionNotAvailableRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - readSessionNotAvailableRuleGenerator, + generateReadOrWriteSessionNotAvailableRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT }, { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - readSessionNotAvailableRuleGenerator, + generateReadOrWriteSessionNotAvailableRules, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateRetryWithRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(6), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[] { + String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateServiceUnavailableRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateServerGeneratedGoneRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateServerGeneratedGoneRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateServerGeneratedGoneRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateServerGeneratedGoneRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateServerGeneratedGoneRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateServerGeneratedGoneRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateServerGeneratedGoneRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + generateTransitTimeoutRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + generateTransitTimeoutRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateInternalServerErrorRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + generateInternalServerErrorRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateInternalServerErrorRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + generateInternalServerErrorRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateTooManyRequestsRules, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateTooManyRequestsRules, + twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateTooManyRequestsRules, + twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, + noRegionSwitchHint, + nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + generateTooManyRequestsRules, + twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT } }; } @DataProvider(name = "readManyTestConfigs") public Object[][] readManyTestConfigs() { - return new Object[][] { - {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofMinutes(6), false, false}, - {FaultInjectionServerErrorType.SERVICE_UNAVAILABLE, 11, Duration.ofSeconds(0), false, false}, - {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, false}, - {FaultInjectionServerErrorType.GONE, Integer.MIN_VALUE, Duration.ofSeconds(60), true, true}, + + Function> executeReadManyOperation = (paramsWrapper) -> { + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + List itemIdentities = paramsWrapper.itemIdentitiesForReadManyOperation; + CosmosReadManyRequestOptions readManyRequestOptions = paramsWrapper.readManyRequestOptions; + + try { + + FeedResponse response = asyncContainer.readMany( + itemIdentities, + readManyRequestOptions, + TestObject.class) + .block(); + + return new ResponseWrapper<>(response); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + + return new Object[][]{ + { + "Test read many operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServiceUnavailableError, + executeReadManyOperation, + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read many operation injected with server-generated GONE in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServerGeneratedGoneError, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read many operation injected with too many requests error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildTooManyRequestsError, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read many operation injected with read/write session not available error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildReadWriteSessionNotAvailableRules, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read many operation injected with service unavailable error in all regions.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions), + this.buildServiceUnavailableError, + executeReadManyOperation, + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + } + }; + } + + @DataProvider(name = "readAllTestConfigs") + public Object[][] readAllTestConfigs() { + + Function> executeReadManyOperation = (paramsWrapper) -> { + CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; + PartitionKey partitionKey = paramsWrapper.partitionKeyForReadAllOperation; + CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; + + try { + + FeedResponse response = asyncContainer.readAllItems( + partitionKey, + queryRequestOptions, + TestObject.class) + .byPage() + .next() + .block(); + + return new ResponseWrapper<>(response); + } catch (Exception ex) { + + if (ex instanceof CosmosException) { + CosmosException cosmosException = Utils.as(ex, CosmosException.class); + return new ResponseWrapper<>(cosmosException); + } + + throw ex; + } + }; + + return new Object[][]{ + { + "Test read all operation injected with service unavailable exception in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServiceUnavailableError, + executeReadManyOperation, + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read all operation injected with server-generated GONE in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServerGeneratedGoneError, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read all operation injected with too many requests error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildTooManyRequestsError, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read all operation injected with read/write session not available error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildReadWriteSessionNotAvailableRules, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read all operation injected with service unavailable error in all regions.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions), + this.buildServiceUnavailableError, + executeReadManyOperation, + noEndToEndTimeout, + noRegionSwitchHint, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + } }; } @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs") - public void operationHitsTerminalExceptionInFirstPreferredRegion( - String testType, + public void operationHitsTerminalExceptionAcrossKRegions( + String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, - Function> faultInjectionRuleGenerator, + Function> generateFaultInjectionRules, CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, CosmosRegionSwitchHint regionSwitchHint, - Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable, - Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative) { - - logger.info("Checking circuit breaking behavior for test type {}", testType); + Boolean nonIdempotentWriteRetriesEnabled, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + ConnectionMode allowedConnectionMode) { List preferredRegions = this.writeRegions; this.firstPreferredRegion = preferredRegions.get(0); this.secondPreferredRegion = preferredRegions.get(1); + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("Test is not applicable to GATEWAY connectivity mode!"); - } - - if (regionSwitchHint != null) { - clientBuilder = clientBuilder - .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + if (connectionPolicy.getConnectionMode() != allowedConnectionMode) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", allowedConnectionMode)); } - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - - validateStringArg(this.sharedAsyncDatabaseId); - CosmosAsyncDatabase database = client.getDatabase(this.sharedAsyncDatabaseId); - - CosmosAsyncContainer container; - OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + CosmosAsyncClient asyncClient = null; + FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); try { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + asyncClient = clientBuilder.buildAsyncClient(); - String designatedContainerId = getContainerIdByFaultInjectionOperationType(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()); + operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom = resolveTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); + int testObjCountToBootstrapFrom = operationInvocationParamsWrapper.itemCountToBootstrapContainerFrom; - validateStringArg(designatedContainerId); - container = database.getContainer(designatedContainerId); + operationInvocationParamsWrapper.containerIdToTarget = resolveContainerIdByFaultInjectionOperationType(faultInjectionOperationType); - RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(client); - - RxCollectionCache collectionCache = ReflectionUtils.getClientCollectionCache(documentClient); - RxPartitionKeyRangeCache partitionKeyRangeCache = ReflectionUtils.getPartitionKeyRangeCache(documentClient); + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker - = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); - - int testObjCountToBootstrapFrom = getTestObjectCountToBootstrapFrom(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(), 15); List testObjects = new ArrayList<>(); for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { TestObject testObject = TestObject.create(); testObjects.add(testObject); - container.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); } FeedRange faultyFeedRange; - final Utils.ValueHolder faultyFeedRangeEpkImpl = new Utils.ValueHolder<>(); - final Utils.ValueHolder faultyFeedRangePartitionKeyImpl = new Utils.ValueHolder<>(); - final Utils.ValueHolder> faultyPartitionKeyRanges = new Utils.ValueHolder<>(); - final Utils.ValueHolder faultyDocumentCollection = new Utils.ValueHolder<>(); - if (testObjects.size() != 1) { faultyFeedRange = FeedRange.forFullRange(); - faultyFeedRangeEpkImpl.v = (FeedRangeEpkImpl) faultyFeedRange; } else { faultyFeedRange = FeedRange.forLogicalPartition(new PartitionKey(testObjects.get(0).getId())); - faultyFeedRangePartitionKeyImpl.v = (FeedRangePartitionKeyImpl) faultyFeedRange; } - faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); - faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); - faultInjectionRuleParamsWrapper.withFaultInjectionApplicableRegions(Arrays.asList(preferredRegions.get(0))); + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith = testObjects; - if (faultyFeedRangeEpkImpl.v != null) { - collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) - .flatMap(collection -> { - faultyDocumentCollection.v = collection; - return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangeEpkImpl.v.getRange(), true, null); - }) - .flatMap(listValueHolder -> { - faultyPartitionKeyRanges.v = listValueHolder.v; - return Mono.just(listValueHolder); - }).block(); - } else if (faultyFeedRangePartitionKeyImpl.v != null) { - collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) - .flatMap(collection -> { - faultyDocumentCollection.v = collection; - return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangePartitionKeyImpl.v.getEffectiveRange(collection.getPartitionKey()), true, null); - }) - .flatMap(listValueHolder -> { - faultyPartitionKeyRanges.v = listValueHolder.v; - return Mono.just(listValueHolder); - }).block(); - } + } catch (Exception ex) { + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); + } finally { + safeClose(asyncClient); + } - assertThat(faultyPartitionKeyRanges.v).isNotNull(); - assertThat(faultyPartitionKeyRanges.v).isNotEmpty(); - assertThat(faultyDocumentCollection.v).isNotNull(); + Function> executeDataPlaneOperation + = resolveDataPlaneOperation(faultInjectionOperationType); - List faultInjectionRules = faultInjectionRuleGenerator.apply(faultInjectionRuleParamsWrapper); + operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions(); - if (faultInjectionRules != null && !faultInjectionRules.isEmpty()) { + if (e2eLatencyPolicyCfg != null) { + operationInvocationParamsWrapper.patchItemRequestOptions = new CosmosPatchItemRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); - Function> faultInjectedFunc = - generateOperation(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()); + operationInvocationParamsWrapper.queryRequestOptions = new CosmosQueryRequestOptions() + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); - assertThat(faultInjectedFunc).isNotNull().as("faultInjectedFunc cannot be null!"); + operationInvocationParamsWrapper.itemRequestOptions + .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } - if (e2eLatencyPolicyCfg != null) { - operationInvocationParamsWrapper.itemRequestOptions = new CosmosItemRequestOptions() - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + if (nonIdempotentWriteRetriesEnabled) { + operationInvocationParamsWrapper.itemRequestOptions + .setNonIdempotentWriteRetryPolicy(true, true); + } - operationInvocationParamsWrapper.queryRequestOptions = new CosmosQueryRequestOptions() - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + 15, + 15); + } - operationInvocationParamsWrapper.patchItemRequestOptions = new CosmosPatchItemRequestOptions() - .setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); - } + @Test(groups = {"multi-master"}, dataProvider = "readManyTestConfigs") + public void readManyOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + ConnectionMode allowedConnectionMode) { - operationInvocationParamsWrapper.asyncContainer = container; - operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = faultyFeedRange; - operationInvocationParamsWrapper.feedRangeForQuery = faultyFeedRange; + List preferredRegions = this.writeRegions; - CosmosFaultInjectionHelper - .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) - .block(); + this.firstPreferredRegion = this.writeRegions.get(0); + this.secondPreferredRegion = this.writeRegions.get(1); - boolean hasReachedCircuitBreakingThreshold = false; - int executionCountAfterCircuitBreakingThreshold = 0; + CosmosAsyncClient asyncClient = null; - for (int i = 1; i <= 15; i++) { + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + operationInvocationParamsWrapper.queryType = QueryType.READ_MANY; - operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); - OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - int expectedCircuitBreakingThreshold = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? 5 : 10; + ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (!hasReachedCircuitBreakingThreshold) { - hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == globalPartitionEndpointManagerForCircuitBreaker.getExceptionCountByPartitionKeyRange( - new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId())); - } else { - executionCountAfterCircuitBreakingThreshold++; - } + if (connectionPolicy.getConnectionMode() != allowedConnectionMode) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", allowedConnectionMode)); + } - if (response.cosmosItemResponse != null) { - assertThat(response.cosmosItemResponse).isNotNull(); - assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + try { - if (executionCountAfterCircuitBreakingThreshold > 1) { - logger.info("In circuit breaking assertion for item response..."); - validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); - } - } else if (response.feedResponse != null) { - assertThat(response.feedResponse).isNotNull(); - assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + asyncClient = clientBuilder.buildAsyncClient(); - if (executionCountAfterCircuitBreakingThreshold > 1) { - logger.info("In circuit breaking assertion for feed response..."); - validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); - } - } else if (response.cosmosException != null) { - assertThat(response.cosmosException).isNotNull(); - assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; - if (!hasReachedCircuitBreakingThreshold) { + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); - logger.info("In circuit breaking assertion for exception..."); - CosmosDiagnosticsContext ctx = response.cosmosException.getDiagnostics().getDiagnosticsContext(); + List feedRanges = asyncContainer.getFeedRanges().block(); - assertThat(ctx.getContactedRegionNames().size()).isEqualTo(1); - assertThat(ctx.getContactedRegionNames().iterator().next()).isEqualTo(preferredRegions.get(0).toLowerCase(Locale.ROOT)); - } - } else if (response.batchResponse != null) { - assertThat(response.batchResponse).isNotNull(); - assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); + assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); - if (executionCountAfterCircuitBreakingThreshold > 1) { - logger.info("In circuit breaking assertion for batch response..."); - validateRegionsContactedWhenShortCircuitRegionMarkedAsUnavailable.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); - } - } - } + Map> partitionKeyToItemIdentityList = new HashMap<>(); + List partitionKeys = new ArrayList<>(); - logger.info("Sleep for 90 seconds"); - Thread.sleep(90_000); + for (FeedRange ignored : feedRanges) { + String pkForFeedRange = UUID.randomUUID().toString(); - for (int i = 16; i <= 30; i++) { - operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); - OperationExecutionResult response = faultInjectedFunc.apply(operationInvocationParamsWrapper); + partitionKeys.add(pkForFeedRange); + partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); - logger.info("Hit count : {}", faultInjectionRules.stream().mapToLong(FaultInjectionRule::getHitCount).sum()); + for (int i = 0; i < 10; i++) { + TestObject testObject = TestObject.create(pkForFeedRange); - if (response.cosmosItemResponse != null) { - assertThat(response.cosmosItemResponse).isNotNull(); - assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getMypk()), new CosmosItemRequestOptions()).block(); + } + } - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); - } else if (response.feedResponse != null) { - assertThat(response.feedResponse).isNotNull(); - assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + CosmosReadManyRequestOptions readManyRequestOptions = new CosmosReadManyRequestOptions(); - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); - } else if (response.cosmosException != null) { - assertThat(response.cosmosException).isNotNull(); - assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + if (e2eLatencyPolicyCfg != null) { + readManyRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); + } - response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response.batchResponse != null) { - assertThat(response.batchResponse).isNotNull(); - assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + operationInvocationParamsWrapper.readManyRequestOptions = readManyRequestOptions; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); - validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); - } - } - } + PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); + FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); + + operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Test should have passed!"); + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeClose(client); + safeClose(asyncClient); } + + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + 15, + 15); } - @Test(groups = {"multi-master"}, dataProvider = "readManyTestConfigs") - public void readManyOperationHitsTerminalExceptionInFirstPreferredRegion( - FaultInjectionServerErrorType faultInjectionServerErrorType, - int faultInjectionHitCount, - Duration faultInjectionDuration, - boolean shouldEndToEndTimeoutBeInjected, - boolean shouldThresholdBasedAvailabilityStrategyBeEnabled) { + @Test(groups = {"multi-master"}, dataProvider = "readAllTestConfigs") + public void readAllOperationHitsTerminalExceptionAcrossKRegions( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + ConnectionMode allowedConnectionMode) { + + CosmosAsyncClient asyncClient = null; + + OperationInvocationParamsWrapper operationInvocationParamsWrapper = new OperationInvocationParamsWrapper(); + operationInvocationParamsWrapper.queryType = QueryType.READ_ALL; List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("queryWithNoThresholdBasedAvailabilityStrategyHits408InFirstPreferredRegion test is not applicable to GATEWAY connectivity mode!"); + if (connectionPolicy.getConnectionMode() != allowedConnectionMode) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); } - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - - String multiPartitionContainerId = UUID.randomUUID() + "-multi-partition-test-container"; - - CosmosAsyncContainer container = null; - CosmosContainerProperties containerProperties = new CosmosContainerProperties(multiPartitionContainerId, "/mypk"); - ThroughputProperties throughputProperties = ThroughputProperties.createManualThroughput(12_000); - try { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + asyncClient = clientBuilder.buildAsyncClient(); - database.createContainerIfNotExists(containerProperties, throughputProperties).block(); - container = database.getContainer(multiPartitionContainerId); + operationInvocationParamsWrapper.containerIdToTarget = this.sharedMultiPartitionAsyncContainerIdWhereMyPkIsPartitionKey; - Thread.sleep(10_000); + CosmosAsyncContainer asyncContainer = asyncClient.getDatabase(this.sharedAsyncDatabaseId).getContainer(operationInvocationParamsWrapper.containerIdToTarget); + deleteAllDocuments(asyncContainer); - List feedRanges = container.getFeedRanges().block(); + List feedRanges = asyncContainer.getFeedRanges().block(); assertThat(feedRanges).isNotNull().as("feedRanges is not expected to be null!"); assertThat(feedRanges).isNotEmpty().as("feedRanges is not expected to be empty!"); @@ -805,124 +1528,55 @@ public void readManyOperationHitsTerminalExceptionInFirstPreferredRegion( Map> partitionKeyToItemIdentityList = new HashMap<>(); List partitionKeys = new ArrayList<>(); - for (FeedRange feedRange : feedRanges) { + for (FeedRange ignored : feedRanges) { String pkForFeedRange = UUID.randomUUID().toString(); + partitionKeys.add(pkForFeedRange); partitionKeyToItemIdentityList.put(pkForFeedRange, new ArrayList<>()); + for (int i = 0; i < 10; i++) { TestObject testObject = TestObject.create(pkForFeedRange); + partitionKeyToItemIdentityList.get(pkForFeedRange).add(new CosmosItemIdentity(new PartitionKey(pkForFeedRange), testObject.getId())); - container.createItem(testObject, new PartitionKey(pkForFeedRange), new CosmosItemRequestOptions()).block(); + asyncContainer.createItem(testObject, new PartitionKey(testObject.getMypk()), new CosmosItemRequestOptions()).block(); } } - PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); - - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.QUERY_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(faultyPartitionKey)).build()) - .region(preferredRegions.get(0)) - .build(); + CosmosQueryRequestOptions queryRequestOptions = new CosmosQueryRequestOptions(); - FaultInjectionRule faultInjectionRule = null; - - if (faultInjectionServerErrorType == FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) { - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); - - faultInjectionRule = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(faultInjectionHitCount) - .build(); - } else if (faultInjectionServerErrorType == FaultInjectionServerErrorType.GONE) { - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.GONE) - .build(); - - faultInjectionRule = new FaultInjectionRuleBuilder("gone-exception" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .duration(Duration.ofMinutes(7)) - .build(); + if (e2eLatencyPolicyCfg != null) { + queryRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(e2eLatencyPolicyCfg); } - if (faultInjectionRule != null) { - - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container, Arrays.asList(faultInjectionRule)) - .block(); - - for (int i = 1; i <= 15; i++) { - List itemIdentities = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); - - FeedResponse response = container - .readMany(itemIdentities, TestObject.class) - .onErrorResume(throwable -> { - if (throwable instanceof OperationCancelledException) { - logger.error("OperationCancelledException thrown!"); - } - - return Mono.empty(); - }) - .block(); - - logger.info("Hit count : {}", faultInjectionRule.getHitCount()); - - if (response != null) { - assertThat(response).isNotNull(); - assertThat(response.getCosmosDiagnostics()).isNotNull(); - - response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - - logger.info("Sleep for 120 seconds"); - Thread.sleep(120_000); - - for (int i = 16; i <= 30; i++) { - - List itemIdentities = partitionKeyToItemIdentityList.get(partitionKeys.get(0)); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(asyncContainer); - FeedResponse response = container - .readMany(itemIdentities, TestObject.class) - .onErrorResume(throwable -> { - if (throwable instanceof OperationCancelledException) { - logger.error("OperationCancelledException thrown!"); - } - - return Mono.empty(); - }) - .block(); - - logger.info("Hit count : {}", faultInjectionRule.getHitCount()); - - if (response != null) { - assertThat(response).isNotNull(); - assertThat(response.getCosmosDiagnostics()).isNotNull(); + PartitionKey faultyPartitionKey = new PartitionKey(partitionKeys.get(0)); + FeedRange faultyFeedRange = FeedRange.forLogicalPartition(faultyPartitionKey); - response.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - } + operationInvocationParamsWrapper.faultyFeedRange = faultyFeedRange; + operationInvocationParamsWrapper.partitionKeyForReadAllOperation = faultyPartitionKey; + operationInvocationParamsWrapper.queryRequestOptions = queryRequestOptions; - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(faultyFeedRange); } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Query operations should have passed!"); + logger.error("Test failed with ex :", ex); + fail(String.format("Test %s failed in bootstrap stage.", testId)); } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); - safeDeleteCollection(container); - safeClose(client); + safeClose(asyncClient); } + + execute( + testId, + faultInjectionRuleParamsWrapper, + operationInvocationParamsWrapper, + generateFaultInjectionRules, + executeDataPlaneOperation, + regionSwitchHint, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + 15, + 15); } @Test(groups = {"multi-master"}) @@ -954,7 +1608,12 @@ public void operationHitsTerminalExceptionInMultipleContainers() { try { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED", "true"); + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + database.createContainerIfNotExists(containerProperties1, throughputProperties1).block(); container1 = database.getContainer(multiPartitionContainerId1); @@ -1010,7 +1669,7 @@ public void operationHitsTerminalExceptionInMultipleContainers() { OperationInvocationParamsWrapper paramsWrapper1 = new OperationInvocationParamsWrapper(); OperationInvocationParamsWrapper paramsWrapper2 = new OperationInvocationParamsWrapper(); - Function> faultInjectedFunc = generateOperation(FaultInjectionOperationType.READ_ITEM); + Function> faultInjectedFunc = resolveDataPlaneOperation(FaultInjectionOperationType.READ_ITEM); for (int i = 1; i <= 15; i++) { paramsWrapper1.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); @@ -1019,8 +1678,8 @@ public void operationHitsTerminalExceptionInMultipleContainers() { paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); paramsWrapper2.asyncContainer = container2; - OperationExecutionResult response1 = faultInjectedFunc.apply(paramsWrapper1); - OperationExecutionResult response2 = faultInjectedFunc.apply(paramsWrapper2); + ResponseWrapper response1 = faultInjectedFunc.apply(paramsWrapper1); + ResponseWrapper response2 = faultInjectedFunc.apply(paramsWrapper2); logger.info("Hit count : {}", faultInjectionRule1.getHitCount()); logger.info("Hit count : {}", faultInjectionRule2.getHitCount()); @@ -1080,8 +1739,8 @@ public void operationHitsTerminalExceptionInMultipleContainers() { paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); paramsWrapper2.asyncContainer = container2; - OperationExecutionResult response1 = faultInjectedFunc.apply(paramsWrapper1); - OperationExecutionResult response2 = faultInjectedFunc.apply(paramsWrapper2); + ResponseWrapper response1 = faultInjectedFunc.apply(paramsWrapper1); + ResponseWrapper response2 = faultInjectedFunc.apply(paramsWrapper2); logger.info("Hit count : {}", faultInjectionRule1.getHitCount()); logger.info("Hit count : {}", faultInjectionRule2.getHitCount()); @@ -1140,14 +1799,235 @@ public void operationHitsTerminalExceptionInMultipleContainers() { logger.error("Exception thrown :", ex); fail("Test should have passed!"); } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"); + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); safeDeleteCollection(container1); safeDeleteCollection(container2); safeClose(client); } } - private static int getTestObjectCountToBootstrapFrom(FaultInjectionOperationType faultInjectionOperationType, int opCount) { + private void execute( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + OperationInvocationParamsWrapper operationInvocationParamsWrapper, + Function> generateFaultInjectionRules, + Function> executeDataPlaneOperation, + CosmosRegionSwitchHint regionSwitchHint, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { + + logger.info("Checking circuit breaking behavior for test type {}", testId); + + List preferredRegions = this.writeRegions; + + this.firstPreferredRegion = preferredRegions.get(0); + this.secondPreferredRegion = preferredRegions.get(1); + + CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); + + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + + if (regionSwitchHint != null) { + clientBuilder = clientBuilder + .sessionRetryOptions(new SessionRetryOptionsBuilder().regionSwitchHint(regionSwitchHint).build()); + } + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + + validateNonEmptyString(this.sharedAsyncDatabaseId); + CosmosAsyncDatabase database = client.getDatabase(this.sharedAsyncDatabaseId); + + CosmosAsyncContainer container; + + try { + + validateNonEmptyString(operationInvocationParamsWrapper.containerIdToTarget); + container = database.getContainer(operationInvocationParamsWrapper.containerIdToTarget); + + RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(client); + + RxCollectionCache collectionCache = ReflectionUtils.getClientCollectionCache(documentClient); + RxPartitionKeyRangeCache partitionKeyRangeCache = ReflectionUtils.getPartitionKeyRangeCache(documentClient); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); + + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(operationInvocationParamsWrapper.faultyFeedRange); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); + + Utils.ValueHolder faultyFeedRangeEpkImpl = new Utils.ValueHolder<>(); + Utils.ValueHolder faultyFeedRangePartitionKeyImpl = new Utils.ValueHolder<>(); + Utils.ValueHolder> faultyPartitionKeyRanges = new Utils.ValueHolder<>(); + Utils.ValueHolder faultyDocumentCollection = new Utils.ValueHolder<>(); + + assertThat(operationInvocationParamsWrapper.faultyFeedRange).isNotNull().as("Argument 'operationInvocationParamsWrapper.faultyFeedRange' cannot be null!"); + + if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangeEpkImpl) { + + faultyFeedRangeEpkImpl.v = (FeedRangeEpkImpl) operationInvocationParamsWrapper.faultyFeedRange; + + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangeEpkImpl.v.getRange(), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } else if (operationInvocationParamsWrapper.faultyFeedRange instanceof FeedRangePartitionKeyImpl) { + + faultyFeedRangePartitionKeyImpl.v = (FeedRangePartitionKeyImpl) operationInvocationParamsWrapper.faultyFeedRange; + + collectionCache.resolveByNameAsync(null, containerAccessor.getLinkWithoutTrailingSlash(container), null) + .flatMap(collection -> { + faultyDocumentCollection.v = collection; + return partitionKeyRangeCache.tryGetOverlappingRangesAsync(null, collection.getResourceId(), faultyFeedRangePartitionKeyImpl.v.getEffectiveRange(collection.getPartitionKey()), true, null); + }) + .flatMap(listValueHolder -> { + faultyPartitionKeyRanges.v = listValueHolder.v; + return Mono.just(listValueHolder); + }).block(); + } else { + fail("Argument 'operationInvocationParamsWrapper.faultyFeedRange' has to be a sub-type of FeedRangeEpkImpl or FeedRangePartitionKeyImpl!"); + } + + validateNonEmptyList(faultyPartitionKeyRanges.v); + assertThat(faultyDocumentCollection.v).isNotNull(); + + List faultInjectionRules = generateFaultInjectionRules.apply(faultInjectionRuleParamsWrapper); + + if (faultInjectionRules != null && !faultInjectionRules.isEmpty()) { + + operationInvocationParamsWrapper.asyncContainer = container; + operationInvocationParamsWrapper.feedRangeToDrainForChangeFeed = operationInvocationParamsWrapper.faultyFeedRange; + operationInvocationParamsWrapper.feedRangeForQuery = operationInvocationParamsWrapper.faultyFeedRange; + + CosmosFaultInjectionHelper + .configureFaultInjectionRules(faultInjectionRuleParamsWrapper.getFaultInjectionApplicableAsyncContainer(), faultInjectionRules) + .block(); + + boolean hasReachedCircuitBreakingThreshold = false; + int executionCountAfterCircuitBreakingThresholdBreached = 0; + + List testObjects = operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith; + + for (int i = 1; i <= operationIterationCountInFailureFlow; i++) { + + if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { + validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); + } + + ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + + int expectedCircuitBreakingThreshold = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? 5 : 10; + + if (!hasReachedCircuitBreakingThreshold) { + hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == globalPartitionEndpointManagerForCircuitBreaker.getExceptionCountByPartitionKeyRange( + new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId())); + } else { + executionCountAfterCircuitBreakingThresholdBreached++; + } + + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + logger.info("In circuit breaking assertion for item response..."); + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); + } + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + logger.info("In circuit breaking assertion for feed response..."); + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); + } + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + + if (!hasReachedCircuitBreakingThreshold) { + + logger.info("In circuit breaking assertion for exception..."); + CosmosDiagnosticsContext ctx = response.cosmosException.getDiagnostics().getDiagnosticsContext(); + + validateRegionsContactedWhenExceptionBubblesUp.accept(ctx); + } + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + logger.info("In circuit breaking assertion for batch response..."); + validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); + } + } + } + + logger.info("Sleep for 90 seconds"); + Thread.sleep(90_000); + + for (int i = operationIterationCountInFailureFlow + 1; i <= operationIterationCountInFailureFlow + operationIterationCountInRecoveryFlow; i++) { + + if (!(operationInvocationParamsWrapper.queryType == QueryType.READ_MANY || operationInvocationParamsWrapper.queryType == QueryType.READ_ALL)) { + operationInvocationParamsWrapper.createdTestObject = testObjects.isEmpty() ? null : testObjects.get(i % testObjects.size()); + } else if (operationInvocationParamsWrapper.queryType == QueryType.READ_MANY) { + validateNonEmptyList(operationInvocationParamsWrapper.itemIdentitiesForReadManyOperation); + } + + ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); + + logger.info("Hit count : {}", faultInjectionRules.stream().mapToLong(FaultInjectionRule::getHitCount).sum()); + + if (response.cosmosItemResponse != null) { + assertThat(response.cosmosItemResponse).isNotNull(); + assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); + + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); + } else if (response.feedResponse != null) { + assertThat(response.feedResponse).isNotNull(); + assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); + + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); + } else if (response.cosmosException != null) { + assertThat(response.cosmosException).isNotNull(); + assertThat(response.cosmosException.getDiagnostics()).isNotNull(); + + response.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( + regionContacted -> logger.info("Region contacted : {}", regionContacted) + ); + } else if (response.batchResponse != null) { + assertThat(response.batchResponse).isNotNull(); + assertThat(response.batchResponse.getDiagnostics()).isNotNull(); + + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); + } + } + } + } catch (InterruptedException ex) { + fail("InterruptedException should not have been thrown!"); + } catch (Exception ex) { + logger.error("Exception thrown :", ex); + fail("Test should have passed!"); + } finally { + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + safeClose(client); + } + } + + private static int resolveTestObjectCountToBootstrapFrom(FaultInjectionOperationType faultInjectionOperationType, int opCount) { switch (faultInjectionOperationType) { case READ_ITEM: case UPSERT_ITEM: @@ -1166,7 +2046,7 @@ private static int getTestObjectCountToBootstrapFrom(FaultInjectionOperationType } } - private static Function> generateOperation(FaultInjectionOperationType faultInjectionOperationType) { + private static Function> resolveDataPlaneOperation(FaultInjectionOperationType faultInjectionOperationType) { switch (faultInjectionOperationType) { case READ_ITEM: @@ -1185,12 +2065,12 @@ private static Function(readItemResponse); + return new ResponseWrapper<>(readItemResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1211,12 +2091,12 @@ private static Function(upsertItemResponse); + return new ResponseWrapper<>(upsertItemResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1237,12 +2117,12 @@ private static Function(createItemResponse); + return new ResponseWrapper<>(createItemResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1263,12 +2143,12 @@ private static Function(deleteItemResponse); + return new ResponseWrapper<>(deleteItemResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1293,12 +2173,12 @@ private static Function(patchItemResponse); + return new ResponseWrapper<>(patchItemResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1320,12 +2200,12 @@ private static Function(queryItemResponse); + return new ResponseWrapper<>(queryItemResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1347,12 +2227,12 @@ private static Function(deleteItemResponse); + return new ResponseWrapper<>(deleteItemResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1370,11 +2250,11 @@ private static Function(batchResponse); + return new ResponseWrapper<>(batchResponse); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1393,12 +2273,12 @@ private static Function(feedResponseFromChangeFeed); + return new ResponseWrapper<>(feedResponseFromChangeFeed); } catch (Exception ex) { if (ex instanceof CosmosException) { CosmosException cosmosException = Utils.as(ex, CosmosException.class); - return new OperationExecutionResult<>(cosmosException); + return new ResponseWrapper<>(cosmosException); } throw ex; @@ -1409,14 +2289,14 @@ private static Function { + private static class ResponseWrapper { private final CosmosItemResponse cosmosItemResponse; private final CosmosException cosmosException; private final FeedResponse feedResponse; private final CosmosBatchResponse batchResponse; - OperationExecutionResult(FeedResponse feedResponse) { + ResponseWrapper(FeedResponse feedResponse) { this.feedResponse = feedResponse; this.cosmosException = null; this.cosmosItemResponse = null; this.batchResponse = null; } - OperationExecutionResult(CosmosItemResponse cosmosItemResponse) { + ResponseWrapper(CosmosItemResponse cosmosItemResponse) { this.cosmosItemResponse = cosmosItemResponse; this.cosmosException = null; this.feedResponse = null; this.batchResponse = null; } - OperationExecutionResult(CosmosException cosmosException) { + ResponseWrapper(CosmosException cosmosException) { this.cosmosException = cosmosException; this.cosmosItemResponse = null; this.feedResponse = null; this.batchResponse = null; } - OperationExecutionResult(CosmosBatchResponse batchResponse) { + ResponseWrapper(CosmosBatchResponse batchResponse) { this.cosmosException = null; this.cosmosItemResponse = null; this.feedResponse = null; @@ -1498,9 +2375,17 @@ private static class OperationInvocationParamsWrapper { public TestObject createdTestObject; public CosmosItemRequestOptions itemRequestOptions; public CosmosQueryRequestOptions queryRequestOptions; + public CosmosReadManyRequestOptions readManyRequestOptions; public CosmosItemRequestOptions patchItemRequestOptions; public FeedRange feedRangeToDrainForChangeFeed; public FeedRange feedRangeForQuery; + public List itemIdentitiesForReadManyOperation; + public PartitionKey partitionKeyForReadAllOperation; + public String containerIdToTarget; + public int itemCountToBootstrapContainerFrom; + public FeedRange faultyFeedRange; + public List testObjectsForDataPlaneOperationToWorkWith; + public QueryType queryType; } private static class FaultInjectionRuleParamsWrapper { @@ -1512,6 +2397,7 @@ private static class FaultInjectionRuleParamsWrapper { private List faultInjectionApplicableRegions; private FeedRange faultInjectionApplicableFeedRange; private FaultInjectionOperationType faultInjectionOperationType; + private List itemIdentitiesForReadMany; public CosmosAsyncContainer getFaultInjectionApplicableAsyncContainer() { return faultInjectionApplicableAsyncContainer; @@ -1679,7 +2565,7 @@ private static List buildTransitTimeoutRules(FaultInjectionR return faultInjectionRules; } - private static List buildReadSessionNotAvailableRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + private static List buildReadWriteSessionNotAvailableRules(FaultInjectionRuleParamsWrapper paramsWrapper) { FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) @@ -1766,6 +2652,34 @@ private static List buildInternalServerErrorRules(FaultInjec return faultInjectionRules; } + private static List buildRetryWithFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders + .getResultBuilder(FaultInjectionServerErrorType.RETRY_WITH) + .build(); + + List faultInjectionRules = new ArrayList<>(); + + for (String applicableRegion : paramsWrapper.getFaultInjectionApplicableRegions()) { + + FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() + .operationType(paramsWrapper.getFaultInjectionOperationType()) + .connectionType(FaultInjectionConnectionType.DIRECT) + .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) + .region(applicableRegion) + .build(); + + FaultInjectionRule faultInjectionRule = new FaultInjectionRuleBuilder("retry-with-rule-" + UUID.randomUUID()) + .condition(faultInjectionCondition) + .result(faultInjectionServerErrorResult) + .duration(paramsWrapper.getFaultInjectionDuration()) + .build(); + + faultInjectionRules.add(faultInjectionRule); + } + + return faultInjectionRules; + } + private static boolean doesOperationHaveWriteSemantics(FaultInjectionOperationType faultInjectionOperationType) { switch (faultInjectionOperationType) { @@ -1785,8 +2699,30 @@ private static boolean doesOperationHaveWriteSemantics(FaultInjectionOperationTy } } - private static void validateStringArg(String input) { + private static void validateNonEmptyString(String input) { assertThat(input).isNotNull(); assertThat(input).isNotEmpty(); } + + private static void validateNonEmptyList(List list) { + assertThat(list).isNotNull(); + assertThat(list).isNotEmpty(); + } + + private static boolean isReadManyOperation(OperationInvocationParamsWrapper paramsWrapper) { + return !(paramsWrapper.itemIdentitiesForReadManyOperation == null || paramsWrapper.itemIdentitiesForReadManyOperation.isEmpty()); + } + + private static void deleteAllDocuments(CosmosAsyncContainer asyncContainer) { + asyncContainer + .queryItems("SELECT * FROM C", TestObject.class) + .collectList() + .flatMapMany(Flux::fromIterable) + .flatMap(testObject -> asyncContainer.deleteItem(testObject.getId(), new PartitionKey(testObject.getMypk()))) + .blockLast(); + } + + private enum QueryType { + READ_MANY, READ_ALL, QUERY_TEXT_BASED + } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java index 720b8effdb8e..5a352678a334 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RenameCollectionAwareClientRetryPolicyTest.java @@ -4,8 +4,8 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.caches.RxClientCollectionCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.WFConstants; -import com.azure.cosmos.models.ModelBridgeInternal; import io.netty.handler.timeout.ReadTimeoutException; import org.mockito.Mockito; import org.testng.annotations.Test; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index f72c7f1cf8b4..c9a1db80efc1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -5,6 +5,7 @@ import com.azure.cosmos.ConsistencyLevel; import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.GatewayServiceConfigurationReader; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.http.HttpClient; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index 794e205e0b18..31496cf438da 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -5,6 +5,7 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.cosmos.ConnectionMode; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.Protocol; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.http.HttpClient; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java index 982a2be96006..4c408634e857 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java @@ -6,7 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentCollection; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.InvalidPartitionException; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java index bc76d0e51613..c41e07cdf9d1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java @@ -11,7 +11,7 @@ import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.OperationType; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 3f8ce214164b..03198543fddf 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -11,7 +11,7 @@ import com.azure.cosmos.implementation.Document; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IRetryPolicyFactory; import com.azure.cosmos.implementation.PartitionKeyRange; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java index fd221f2e2556..800e2203a159 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/FetcherTest.java @@ -4,7 +4,7 @@ package com.azure.cosmos.implementation.query; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index 5557544cfb3a..5d3e7088f1d7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -13,6 +13,7 @@ import com.azure.cosmos.implementation.batch.ServerBatchRequest; import com.azure.cosmos.implementation.caches.RxClientCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.directconnectivity.AddressSelector; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java index 768ed0d4595e..f6276835306d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java @@ -5,6 +5,7 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedStateV1; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.feedranges.FeedRangeInternal; import com.azure.cosmos.implementation.query.Paginator; import com.azure.cosmos.implementation.spark.OperationContext; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 7d8705f6d2e4..470f33e14a02 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.caches.RxCollectionCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.WebExceptionUtility; import com.azure.cosmos.implementation.faultinjection.FaultInjectionRequestContext; import org.slf4j.Logger; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 22ea53d80450..7399509c6802 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -3,6 +3,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.circuitBreaker.PartitionLevelCircuitBreakerConfig; import com.azure.cosmos.implementation.directconnectivity.Protocol; import io.netty.handler.ssl.SslContext; import io.netty.handler.ssl.SslContextBuilder; @@ -202,9 +203,8 @@ public class Configs { // + "\"applyDiagnosticThresholdsForTransportLevelMeters\":true}"); public static final String METRICS_CONFIG = "COSMOS.METRICS_CONFIG"; public static final String DEFAULT_METRICS_CONFIG = CosmosMicrometerMetricsConfig.DEFAULT.toJson(); - - private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED"; - private static final boolean DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED = false; + private static final String DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = PartitionLevelCircuitBreakerConfig.DEFAULT.toJson(); + private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"; public Configs() { this.sslContext = sslContextInit(); @@ -598,9 +598,14 @@ public static CosmosMicrometerMetricsConfig getMetricsConfig() { return CosmosMicrometerMetricsConfig.fromJsonString(metricsConfig); } - public static boolean isPartitionLevelCircuitBreakerEnabled() { - return getJVMConfigAsBoolean( - PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED, - DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_ENABLED); + public static PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreakerConfig() { + String partitionLevelCircuitBreakerConfig = + System.getProperty( + PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG, + firstNonNull( + emptyToNull(System.getenv().get(PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG)), + DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG)); + + return PartitionLevelCircuitBreakerConfig.fromJsonString(partitionLevelCircuitBreakerConfig); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index e1c97e7f1191..cbd137d1fa8e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -146,6 +146,7 @@ public DocumentServiceRequestContext clone() { context.replicaAddressValidationEnabled = this.replicaAddressValidationEnabled; context.endToEndOperationLatencyPolicyConfig = this.endToEndOperationLatencyPolicyConfig; context.unavailableRegionsForPartition = this.unavailableRegionsForPartition; + context.feedOperationContextForCircuitBreaker = this.feedOperationContextForCircuitBreaker; return context; } @@ -189,7 +190,7 @@ public void setUnavailableRegionsForPartition(List unavailableRegionsFor this.unavailableRegionsForPartition = unavailableRegionsForPartition; } - public PointOperationContextForCircuitBreaker getPointOperationContext() { + public PointOperationContextForCircuitBreaker getPointOperationContextForCircuitBreaker() { return pointOperationContextForCircuitBreaker; } @@ -197,7 +198,7 @@ public void setPointOperationContext(PointOperationContextForCircuitBreaker poin this.pointOperationContextForCircuitBreaker = pointOperationContextForCircuitBreaker; } - public FeedOperationContextForCircuitBreaker getFeedOperationContext() { + public FeedOperationContextForCircuitBreaker getFeedOperationContextForCircuitBreaker() { return feedOperationContextForCircuitBreaker; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java index e9923887f881..1e28b8191f19 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java @@ -3,6 +3,8 @@ package com.azure.cosmos.implementation; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; + import java.util.Map; public class FeedOperationContextForCircuitBreaker { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index 42e2f1fb8c0e..aefd03265b91 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -4,6 +4,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.routing.LocationCache; import com.azure.cosmos.implementation.routing.LocationHelper; import org.slf4j.Logger; @@ -17,10 +18,8 @@ import java.time.Duration; import java.time.LocalDateTime; import java.util.ArrayList; -import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; /** diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java index f26cfc76a4b1..31d303910f11 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RetryPolicy.java @@ -5,6 +5,7 @@ import com.azure.cosmos.ThrottlingRetryOptions; import com.azure.cosmos.implementation.caches.RxCollectionCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; /** * While this class is public, but it is not part of our published public APIs. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index bed741275fbc..24eed1939352 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -27,6 +27,7 @@ import com.azure.cosmos.implementation.caches.RxClientCollectionCache; import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.cpu.CpuMemoryListener; import com.azure.cosmos.implementation.cpu.CpuMemoryMonitor; @@ -1909,7 +1910,7 @@ private Mono getBatchDocumentRequest(DocumentClientRet addBatchHeaders(request, serverBatchRequest, documentCollectionValueHolder.v); - if (Configs.isPartitionLevelCircuitBreakerEnabled() && options != null) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request) && options != null) { options.setPartitionKeyDefinition(documentCollectionValueHolder.v.getPartitionKey()); addPartitionLevelUnavailableRegionsForRequest(request, options, collectionRoutingMapValueHolder.v); } @@ -2233,7 +2234,7 @@ private Mono> createDocumentCore( DocumentClientRetryPolicy finalRetryPolicyInstance = requestRetryPolicy; AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs(() -> @@ -2318,46 +2319,41 @@ private static Mono getPointOperationResponseMonoWithE2ETimeout( return rxDocumentServiceResponseMono; } - private Mono handleRegionFeedbackForPointOperation( + private Mono handleCircuitBreakingFeedbackForPointOperation( Mono response, AtomicReference requestReference) { return response .doOnSuccess(ignore -> { - RxDocumentServiceRequest succeededRequest = requestReference.get(); - - if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(succeededRequest)) { - + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest succeededRequest = requestReference.get(); checkNotNull(succeededRequest.requestContext, "Argument 'succeededRequest.requestContext' must not be null!"); - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = succeededRequest.requestContext.getPointOperationContext(); - + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = succeededRequest.requestContext.getPointOperationContextForCircuitBreaker(); checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); - pointOperationContextForCircuitBreaker.setHasOperationSeenSuccess(); + + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(succeededRequest); } }) .doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { - RxDocumentServiceRequest failedRequest = requestReference.get(); - - if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(failedRequest)) { - + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest failedRequest = requestReference.get(); checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContext(); - + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { - this.handleLocationExceptionForPartitionKeyRange(failedRequest); + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); } } else { - this.handleLocationExceptionForPartitionKeyRange(failedRequest); + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); } } } @@ -2368,27 +2364,111 @@ private Mono handleRegionFeedbackForPointOperation( return; } - RxDocumentServiceRequest potentiallyFailedRequest = requestReference.get(); - - if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(potentiallyFailedRequest)) { - + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest potentiallyFailedRequest = requestReference.get(); checkNotNull(potentiallyFailedRequest.requestContext, "Argument 'potentiallyFailedRequest.requestContext' must not be null!"); - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = potentiallyFailedRequest.requestContext.getPointOperationContext(); - + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = potentiallyFailedRequest.requestContext.getPointOperationContextForCircuitBreaker(); checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { - this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); + this.handleLocationCancellationExceptionForPartitionKeyRange(potentiallyFailedRequest); } } - // todo: investigate below scenario - gets called when INTERNAL_SERVER_ERROR injected - // todo: something is causing cancellation w/o e2e operation timeout set + } + + // todo: investigate below scenario - gets called when INTERNAL_SERVER_ERROR injected + // todo: something is causing cancellation w/o e2e operation timeout set // else { // this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); // } + }); + } + + private Mono> handleCircuitBreakingFeedbackForFeedOperationWithAvailabilityStrategy(Mono> response, RxDocumentServiceRequest request) { + + return response + .doOnSuccess(nonTransientFeedOperationResult -> { + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + if (!nonTransientFeedOperationResult.isError()) { + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker + = request.requestContext.getFeedOperationContextForCircuitBreaker(); + + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' cannot be null!"); + + feedOperationContextForCircuitBreaker.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); + } + } + }) + .doFinally(signalType -> { + if (signalType != SignalType.CANCEL) { + return; + } + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker + = request.requestContext.getFeedOperationContextForCircuitBreaker(); + + checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' cannot be null!"); + + if (!feedOperationContextForCircuitBreaker.getIsRequestHedged() + && feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled() + && feedOperationContextForCircuitBreaker.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { + this.handleLocationCancellationExceptionForPartitionKeyRange(request); + } + } + }); + } + + private Mono handleCircuitBreakingFeedbackForPointOperationWithAvailabilityStrategy(Mono response, RxDocumentServiceRequest request) { + + if (!this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + return response; + } + + return response + .doOnSuccess(nonTransientPointOperationResult -> { + + if (!nonTransientPointOperationResult.isError()) { + + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker + = request.requestContext.getPointOperationContextForCircuitBreaker(); + + pointOperationContextForCircuitBreaker.setHasOperationSeenSuccess(); + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); + } + + }) + .doFinally(signalType -> { + if (signalType != SignalType.CANCEL) { + return; + } + + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker + = request.requestContext.getPointOperationContextForCircuitBreaker(); + + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' cannot be null!"); + + if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() + && pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled() + && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + this.handleLocationCancellationExceptionForPartitionKeyRange(request); } }); } @@ -2475,7 +2555,7 @@ private Mono> upsertDocumentCore( DocumentClientRetryPolicy finalRetryPolicyInstance = requestRetryPolicy; AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs( @@ -2580,7 +2660,7 @@ private Mono> replaceDocumentCore( DocumentClientRetryPolicy finalRequestRetryPolicy = requestRetryPolicy; AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs( @@ -2671,7 +2751,7 @@ private Mono> replaceDocumentCore( DocumentClientRetryPolicy finalRequestRetryPolicy = requestRetryPolicy; AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation(ObservableHelper.inlineIfPossibleAsObs( + return handleCircuitBreakingFeedbackForPointOperation(ObservableHelper.inlineIfPossibleAsObs( () -> replaceDocumentInternal( document, options, @@ -2861,7 +2941,7 @@ private Mono> patchDocumentCore( AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation( + return handleCircuitBreakingFeedbackForPointOperation( getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, @@ -3015,7 +3095,7 @@ private Mono> deleteDocumentCore( AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs( @@ -3166,7 +3246,7 @@ private Mono> readDocumentCore( AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( + return handleCircuitBreakingFeedbackForPointOperation(getPointOperationResponseMonoWithE2ETimeout( nonNullRequestOptions, endToEndPolicyConfig, ObservableHelper.inlineIfPossibleAsObs( @@ -4271,7 +4351,7 @@ public Mono executeBatchRequest(String collectionLink, boolean disableAutomaticIdGeneration) { DocumentClientRetryPolicy documentClientRetryPolicy = this.resetSessionTokenRetryPolicy.getRequestPolicy(null); AtomicReference requestReference = new AtomicReference<>(); - return handleRegionFeedbackForPointOperation(ObservableHelper + return handleCircuitBreakingFeedbackForPointOperation(ObservableHelper .inlineIfPossibleAsObs(() -> executeBatchRequestInternal( collectionLink, serverBatchRequest, options, documentClientRetryPolicy, disableAutomaticIdGeneration, requestReference), documentClientRetryPolicy), requestReference); } @@ -5736,7 +5816,7 @@ public void addPartitionLevelUnavailableRegionsForRequest( checkNotNull(options.getPartitionKeyDefinition(), "partitionKeyDefinition within options cannot be null!"); checkNotNull(collectionRoutingMap, "collectionRoutingMap cannot be null!"); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { PartitionKeyDefinition partitionKeyDefinition = options.getPartitionKeyDefinition(); PartitionKeyInternal partitionKeyInternal = request.getPartitionKeyInternal(); @@ -5770,7 +5850,7 @@ public void addPartitionLevelUnavailableRegionsForFeedRequest( checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), resolvedPartitionKeyRange); List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); @@ -5796,7 +5876,7 @@ public void addPartitionLevelUnavailableRegionsForChangeFeedRequest( checkNotNull(resolvedPartitionKeyRange, "resolvedPartitionKeyRange cannot be null!"); - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), resolvedPartitionKeyRange); List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); @@ -6019,8 +6099,6 @@ private Mono> wrapPointOperationWithAvailabilityStrat ); } - - private static boolean isCosmosException(Throwable t) { final Throwable unwrappedException = Exceptions.unwrap(t); return unwrappedException instanceof CosmosException; @@ -6218,11 +6296,12 @@ private Mono executeFeedOperationWithAvailabilityStrategy( Map partitionKeyRangesWithSuccess = new ConcurrentHashMap<>(); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, false); + feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow.setIsRequestHedged(false); + req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow); + if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForMainRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, false); - feedOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); - req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForMainRequest); return feedOperation.apply(retryPolicyFactory, req); } @@ -6232,25 +6311,31 @@ private Mono executeFeedOperationWithAvailabilityStrategy( orderedApplicableRegionsForSpeculation .forEach(region -> { + RxDocumentServiceRequest clonedRequest = req.clone(); + logger.info("Cloned request : {}", req); + if (monoList.isEmpty()) { // no special error handling for transient errors to suppress them here // because any cross-regional retries are expected to be processed // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForMainRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); - feedOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); - clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForMainRequest); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForNonHedgedRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); + feedOperationContextForCircuitBreakerForNonHedgedRequest.setIsRequestHedged(false); + clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForNonHedgedRequest); + + logger.info("Cloned request : {}", clonedRequest); + Mono> initialMonoAcrossAllRegions = - feedOperation.apply(retryPolicyFactory, clonedRequest) - .map(NonTransientFeedOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isCosmosException, - t -> Mono.just( - new NonTransientFeedOperationResult<>( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + handleCircuitBreakingFeedbackForFeedOperationWithAvailabilityStrategy(feedOperation.apply(retryPolicyFactory, clonedRequest) + .map(NonTransientFeedOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isCosmosException, + t -> Mono.just( + new NonTransientFeedOperationResult<>( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))), clonedRequest); if (logger.isDebugEnabled()) { monoList.add(initialMonoAcrossAllRegions.doOnSubscribe(c -> logger.debug( @@ -6272,17 +6357,19 @@ private Mono executeFeedOperationWithAvailabilityStrategy( feedOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForHedgedRequest); + logger.info("Cloned request : {}", clonedRequest); + // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors Mono> regionalCrossRegionRetryMono = - feedOperation.apply(retryPolicyFactory, clonedRequest) - .map(NonTransientFeedOperationResult::new) - .onErrorResume( - RxDocumentClientImpl::isNonTransientCosmosException, - t -> Mono.just( - new NonTransientFeedOperationResult<>( - Utils.as(Exceptions.unwrap(t), CosmosException.class)))); + handleCircuitBreakingFeedbackForFeedOperationWithAvailabilityStrategy(feedOperation.apply(retryPolicyFactory, clonedRequest) + .map(NonTransientFeedOperationResult::new) + .onErrorResume( + RxDocumentClientImpl::isNonTransientCosmosException, + t -> Mono.just( + new NonTransientFeedOperationResult<>( + Utils.as(Exceptions.unwrap(t), CosmosException.class)))), clonedRequest); Duration delayForCrossRegionalRetry = (availabilityStrategy) .getThreshold() @@ -6356,7 +6443,7 @@ private Mono executeFeedOperationWithAvailabilityStrategy( }); } - private void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { + private void handleLocationCancellationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { URI firstContactedLocationEndpoint = diagnosticsAccessor .getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java index 4dc7c3cded72..20d29ea1b48e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java @@ -1068,6 +1068,7 @@ public RxDocumentServiceRequest clone() { rxDocumentServiceRequest.feedRange = this.feedRange; rxDocumentServiceRequest.effectiveRange = this.effectiveRange; rxDocumentServiceRequest.isFeed = this.isFeed; + rxDocumentServiceRequest.resourceId = this.resourceId; return rxDocumentServiceRequest; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index f777304315ea..1f4e7730ab21 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -9,6 +9,7 @@ import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.caches.RxClientCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.GatewayServiceConfigurationReader; import com.azure.cosmos.implementation.directconnectivity.HttpUtils; import com.azure.cosmos.implementation.directconnectivity.RequestHelper; @@ -549,11 +550,7 @@ private Mono invokeAsyncInternal(RxDocumentServiceReq private Mono invokeAsync(RxDocumentServiceRequest request) { - Callable> funcDelegate = () -> invokeAsyncInternal(request).single().doOnSuccess(ignore -> { - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - this.globalPartitionEndpointManager.handleLocationSuccessForPartitionKeyRange(request); - } - }); + Callable> funcDelegate = () -> invokeAsyncInternal(request).single(); MetadataRequestRetryPolicy metadataRequestRetryPolicy = new MetadataRequestRetryPolicy(this.globalEndpointManager); metadataRequestRetryPolicy.onBeforeSendRequest(request); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java new file mode 100644 index 000000000000..972946513da7 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -0,0 +1,261 @@ +package com.azure.cosmos.implementation.circuitBreaker; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ConsecutiveExceptionBasedCircuitBreaker implements ICircuitBreaker { + + private static final Logger logger = LoggerFactory.getLogger(ConsecutiveExceptionBasedCircuitBreaker.class); + private final PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig; + + public ConsecutiveExceptionBasedCircuitBreaker(PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig) { + this.partitionLevelCircuitBreakerConfig = partitionLevelCircuitBreakerConfig; + } + + public LocationSpecificContext handleException(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + + int exceptionCountAfterHandling + = (isReadOnlyRequest) ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + + GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus + = locationSpecificContext.getLocationHealthStatus(); + + switch (locationHealthStatus) { + case Healthy: + return locationSpecificContext; + case HealthyWithFailures: + case HealthyTentative: + + exceptionCountAfterHandling++; + + if (isReadOnlyRequest) { + return new LocationSpecificContext( + locationSpecificContext.getSuccessCountForWrite(), + locationSpecificContext.getExceptionCountForWrite(), + locationSpecificContext.getSuccessCountForRead(), + exceptionCountAfterHandling, + locationSpecificContext.getUnavailableSince(), + locationSpecificContext.getLocationHealthStatus(), + locationSpecificContext.isExceptionThresholdBreached()); + } else { + return new LocationSpecificContext( + locationSpecificContext.getSuccessCountForWrite(), + exceptionCountAfterHandling, + locationSpecificContext.getSuccessCountForRead(), + locationSpecificContext.getExceptionCountForRead(), + locationSpecificContext.getUnavailableSince(), + locationSpecificContext.getLocationHealthStatus(), + locationSpecificContext.isExceptionThresholdBreached()); + } + case Unavailable: + throw new IllegalStateException(); + default: + throw new IllegalArgumentException(); + } + } + + public LocationSpecificContext handleSuccess(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + int exceptionCountAfterHandling + = (isReadOnlyRequest) ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + + int successCountAfterHandling + = (isReadOnlyRequest) ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForWrite(); + + GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus + = locationSpecificContext.getLocationHealthStatus(); + + switch (locationHealthStatus) { + case Healthy: + return locationSpecificContext; + case HealthyWithFailures: + + exceptionCountAfterHandling = 0; + + if (isReadOnlyRequest) { + return new LocationSpecificContext( + locationSpecificContext.getSuccessCountForWrite(), + locationSpecificContext.getExceptionCountForWrite(), + locationSpecificContext.getSuccessCountForRead(), + exceptionCountAfterHandling, + locationSpecificContext.getUnavailableSince(), + locationSpecificContext.getLocationHealthStatus(), + locationSpecificContext.isExceptionThresholdBreached()); + } else { + return new LocationSpecificContext( + locationSpecificContext.getSuccessCountForWrite(), + exceptionCountAfterHandling, + locationSpecificContext.getSuccessCountForRead(), + locationSpecificContext.getExceptionCountForRead(), + locationSpecificContext.getUnavailableSince(), + locationSpecificContext.getLocationHealthStatus(), + locationSpecificContext.isExceptionThresholdBreached()); + } + case HealthyTentative: + + successCountAfterHandling++; + + if (isReadOnlyRequest) { + return new LocationSpecificContext( + locationSpecificContext.getSuccessCountForWrite(), + locationSpecificContext.getExceptionCountForWrite(), + successCountAfterHandling, + exceptionCountAfterHandling, + locationSpecificContext.getUnavailableSince(), + locationSpecificContext.getLocationHealthStatus(), + locationSpecificContext.isExceptionThresholdBreached()); + } else { + return new LocationSpecificContext( + successCountAfterHandling, + exceptionCountAfterHandling, + locationSpecificContext.getSuccessCountForRead(), + locationSpecificContext.getExceptionCountForRead(), + locationSpecificContext.getUnavailableSince(), + locationSpecificContext.getLocationHealthStatus(), + locationSpecificContext.isExceptionThresholdBreached()); + } + case Unavailable: + throw new IllegalStateException(); + default: + throw new IllegalArgumentException(); + } + } + + public boolean shouldHealthStatusBeDowngraded(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + + int exceptionCountActual + = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + + return exceptionCountActual >= getAllowedExceptionCountToMaintainStatus(locationSpecificContext.getLocationHealthStatus(), isReadOnlyRequest); + } + + public boolean canHealthStatusBeUpgraded(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + + int successCountActual + = isReadOnlyRequest ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForWrite(); + + int exceptionCountActual + = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + + GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); + + return successCountActual >= getMinimumSuccessCountForStatusUpgrade(locationHealthStatus, isReadOnlyRequest) && + (double) exceptionCountActual / (double) successCountActual < getAllowedExceptionToSuccessRatio(locationHealthStatus, isReadOnlyRequest); + } + + private static double getAllowedExceptionToSuccessRatio(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus status, boolean isReadOnlyRequest) { + + if (isReadOnlyRequest) { + switch (status) { + case HealthyWithFailures: + return 0.3d; + case HealthyTentative: + return 0.1d; + default: + return 0d; + } + } else { + switch (status) { + case HealthyWithFailures: + return 0.2d; + case HealthyTentative: + return 0.05d; + default: + return 0d; + } + } + } + + public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus status, boolean isReadOnlyRequest) { + + if (isReadOnlyRequest) { + switch (status) { + case HealthyWithFailures: + if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { + return 10; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { + return 20; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { + return 40; + } + case HealthyTentative: + if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { + return 5; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { + return 10; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { + return 20; + } + case Healthy: + case Unavailable: + return 0; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } else { + switch (status) { + case HealthyWithFailures: + if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { + return 5; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { + return 10; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { + return 20; + } + case HealthyTentative: + if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { + return 10; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { + return 5; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { + return 3; + } + case Healthy: + return 0; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } + } + + public int getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus status, boolean isReadOnlyRequest) { + if (isReadOnlyRequest) { + switch (status) { + case HealthyTentative: + if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { + return 10; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { + return 5; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { + return 3; + } + case Unavailable: + case HealthyWithFailures: + case Healthy: + return 0; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } else { + switch (status) { + case HealthyTentative: + if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { + return 20; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { + return 10; + } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { + return 5; + } + case Unavailable: + case HealthyWithFailures: + case Healthy: + return 0; + default: + throw new IllegalStateException("Unsupported health status: " + status); + } + } + } + + public boolean isPartitionLevelCircuitBreakerEnabled() { + return this.partitionLevelCircuitBreakerConfig.isPartitionLevelCircuitBreakerEnabled(); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java similarity index 67% rename from sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java rename to sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 4b82a301f06c..f5761f850c60 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -1,8 +1,14 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -package com.azure.cosmos.implementation; - +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.Configs; +import com.azure.cosmos.implementation.CosmosSchedulers; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.OperationType; +import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -30,12 +36,19 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo; private final ConcurrentHashMap partitionsWithPossibleUnavailableRegions; private final LocationContextTransitionHandler locationContextTransitionHandler; + private ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); this.partitionsWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; this.locationContextTransitionHandler = new LocationContextTransitionHandler(); + + PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); + + if (partitionLevelCircuitBreakerConfig.getCircuitBreakerType().equals("COUNT_BASED")) { + this.consecutiveExceptionBasedCircuitBreaker = new ConsecutiveExceptionBasedCircuitBreaker(partitionLevelCircuitBreakerConfig); + } } public void init() { @@ -149,7 +162,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String reso URI location = pair.getKey(); LocationSpecificContext locationSpecificContext = pair.getValue(); - if (locationSpecificContext.locationUnavailabilityStatus == LocationUnavailabilityStatus.Unavailable) { + if (locationSpecificContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { unavailableLocations.add(location); } } @@ -202,7 +215,7 @@ private Flux updateStaleLocationInfo() { public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceRequest request) { - if (!Configs.isPartitionLevelCircuitBreakerEnabled()) { + if (!this.consecutiveExceptionBasedCircuitBreaker.isPartitionLevelCircuitBreakerEnabled()) { return false; } @@ -238,7 +251,7 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper 0, 0, Instant.MAX, - LocationUnavailabilityStatus.HealthyWithFailures, + LocationHealthStatus.HealthyWithFailures, false); } @@ -268,7 +281,7 @@ public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI 0, 0, Instant.MAX, - LocationUnavailabilityStatus.Healthy, + LocationHealthStatus.Healthy, false); } @@ -310,7 +323,7 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe return true; } - Instant unavailableSinceSnapshot = locationSpecificContext.unavailableSince; + Instant unavailableSinceSnapshot = locationSpecificContext.getUnavailableSince(); if (mostHealthyTentativeTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { mostHealthyTentativeTimeAcrossRegions = unavailableSinceSnapshot; @@ -340,45 +353,6 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe } } - // todo: (abhmohanty) decouple this? - public class LocationSpecificContext { - private final int exceptionCountForWrite; - private final int successCountForWrite; - private final int exceptionCountForRead; - private final int successCountForRead; - private final Instant unavailableSince; - private final LocationUnavailabilityStatus locationUnavailabilityStatus; - private final boolean isExceptionThresholdBreached; - - public LocationSpecificContext( - int successCountForWrite, - int exceptionCountForWrite, - int successCountForRead, - int exceptionCountForRead, - Instant unavailableSince, - LocationUnavailabilityStatus locationUnavailabilityStatus, - boolean isExceptionThresholdBreached) { - - this.successCountForWrite = successCountForWrite; - this.exceptionCountForWrite = exceptionCountForWrite; - this.exceptionCountForRead = exceptionCountForRead; - this.successCountForRead = successCountForRead; - this.unavailableSince = unavailableSince; - this.locationUnavailabilityStatus = locationUnavailabilityStatus; - this.isExceptionThresholdBreached = isExceptionThresholdBreached; - } - - public boolean isExceptionThresholdBreached() { - return this.isExceptionThresholdBreached; - } - - public boolean isRegionAvailableToProcessRequests() { - return this.locationUnavailabilityStatus == LocationUnavailabilityStatus.Healthy || - this.locationUnavailabilityStatus == LocationUnavailabilityStatus.HealthyWithFailures || - this.locationUnavailabilityStatus == LocationUnavailabilityStatus.HealthyTentative; - } - } - private class LocationContextTransitionHandler { public LocationSpecificContext handleSuccess( @@ -390,13 +364,10 @@ public LocationSpecificContext handleSuccess( logger.info("Handling success"); - LocationUnavailabilityStatus currentStatusSnapshot = locationSpecificContext.locationUnavailabilityStatus; - double allowedFailureRatio = getAllowedExceptionToSuccessRatio(currentStatusSnapshot, isReadOnlyRequest); - - int minSuccessCountForStatusUpgrade = getMinimumSuccessCountForStatusUpgrade(currentStatusSnapshot, isReadOnlyRequest); + LocationHealthStatus currentStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); - int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.exceptionCountForWrite; - int successCountActual = isReadOnlyRequest ? locationSpecificContext.successCountForRead : locationSpecificContext.successCountForWrite; + int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + int successCountActual = isReadOnlyRequest ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForRead(); switch (currentStatusSnapshot) { case Healthy: @@ -404,28 +375,8 @@ public LocationSpecificContext handleSuccess( case HealthyWithFailures: if (!forceStatusChange) { if (exceptionCountActual > 0) { - - exceptionCountActual -= 1; - - if (isReadOnlyRequest) { - return new LocationSpecificContext( - locationSpecificContext.successCountForWrite, - locationSpecificContext.exceptionCountForWrite, - locationSpecificContext.successCountForRead, - exceptionCountActual, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } else { - return new LocationSpecificContext( - locationSpecificContext.successCountForWrite, - exceptionCountActual, - locationSpecificContext.successCountForRead, - locationSpecificContext.exceptionCountForRead, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } + return GlobalPartitionEndpointManagerForCircuitBreaker + .this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificContext, isReadOnlyRequest); } } break; @@ -433,44 +384,26 @@ public LocationSpecificContext handleSuccess( case HealthyTentative: if (!forceStatusChange) { - successCountActual += 1; + LocationSpecificContext locationSpecificContextInner + = GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificContext, isReadOnlyRequest); logger.info("Try to switch to Healthy but actual success count : {}", successCountActual); - if (successCountActual >= minSuccessCountForStatusUpgrade && (double) exceptionCountActual / (double) successCountActual < allowedFailureRatio) { + if (GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificContextInner, isReadOnlyRequest)) { logger.info("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.Healthy); + return this.transitionHealthStatus(LocationHealthStatus.Healthy); } else { - - if (isReadOnlyRequest) { - return new LocationSpecificContext( - locationSpecificContext.successCountForWrite, - locationSpecificContext.exceptionCountForWrite, - successCountActual, - locationSpecificContext.exceptionCountForRead, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } else { - return new LocationSpecificContext( - successCountActual, - locationSpecificContext.exceptionCountForWrite, - locationSpecificContext.successCountForRead, - locationSpecificContext.exceptionCountForRead, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } + return locationSpecificContextInner; } } break; case Unavailable: - Instant unavailableSinceActual = locationSpecificContext.unavailableSince; + Instant unavailableSinceActual = locationSpecificContext.getUnavailableSince(); if (!forceStatusChange) { if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { @@ -482,7 +415,7 @@ public LocationSpecificContext handleSuccess( GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.HealthyTentative); + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); } } else { logger.info("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", @@ -491,7 +424,7 @@ public LocationSpecificContext handleSuccess( partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.HealthyTentative); + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); } break; default: @@ -509,10 +442,7 @@ public LocationSpecificContext handleException( logger.warn("Handling exception"); - LocationUnavailabilityStatus currentStatusSnapshot = locationSpecificContext.locationUnavailabilityStatus; - int allowedExceptionCount = getAllowedExceptionCountToMaintainStatus(currentStatusSnapshot, isReadOnlyRequest); - - int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.exceptionCountForRead : locationSpecificContext.exceptionCountForWrite; + LocationHealthStatus currentStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); switch (currentStatusSnapshot) { case Healthy: @@ -522,33 +452,22 @@ public LocationSpecificContext handleException( partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.HealthyWithFailures); + return this.transitionHealthStatus(LocationHealthStatus.HealthyWithFailures); case HealthyWithFailures: - if (exceptionCountActual < allowedExceptionCount) { + if (!GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { - exceptionCountActual += 1; + LocationSpecificContext locationSpecificContextInner = GlobalPartitionEndpointManagerForCircuitBreaker + .this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); - logger.info("Exception count : {}", exceptionCountActual); + logger.info("Partition {}-{} of collection : {} has exception count of {} for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + isReadOnlyRequest ? locationSpecificContextInner.getExceptionCountForRead() : locationSpecificContextInner.getExceptionCountForWrite(), + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - if (isReadOnlyRequest) { - return new LocationSpecificContext( - locationSpecificContext.successCountForWrite, - locationSpecificContext.exceptionCountForWrite, - locationSpecificContext.successCountForRead, - exceptionCountActual, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } else { - return new LocationSpecificContext( - locationSpecificContext.successCountForWrite, - exceptionCountActual, - locationSpecificContext.successCountForRead, - locationSpecificContext.exceptionCountForRead, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } + return locationSpecificContextInner; } else { GlobalPartitionEndpointManagerForCircuitBreaker .this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); @@ -558,32 +477,12 @@ public LocationSpecificContext handleException( partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.Unavailable); + return this.transitionHealthStatus(LocationHealthStatus.Unavailable); } case HealthyTentative: - if (exceptionCountActual < allowedExceptionCount) { - - exceptionCountActual += 1; - - if (isReadOnlyRequest) { - return new LocationSpecificContext( - locationSpecificContext.successCountForWrite, - locationSpecificContext.exceptionCountForWrite, - locationSpecificContext.successCountForRead, - exceptionCountActual, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } else { - return new LocationSpecificContext( - locationSpecificContext.successCountForWrite, - exceptionCountActual, - locationSpecificContext.successCountForRead, - locationSpecificContext.exceptionCountForRead, - locationSpecificContext.unavailableSince, - locationSpecificContext.locationUnavailabilityStatus, - locationSpecificContext.isExceptionThresholdBreached); - } + if (!GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { + + return GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); } else { logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), @@ -591,14 +490,14 @@ public LocationSpecificContext handleException( partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - return this.transitionHealthStatus(LocationUnavailabilityStatus.Unavailable); + return this.transitionHealthStatus(LocationHealthStatus.Unavailable); } default: throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); } } - public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStatus newStatus) { + public LocationSpecificContext transitionHealthStatus(LocationHealthStatus newStatus) { switch (newStatus) { case Healthy: @@ -608,7 +507,7 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat 0, 0, Instant.MAX, - LocationUnavailabilityStatus.Healthy, + LocationHealthStatus.Healthy, false); case HealthyWithFailures: return new LocationSpecificContext( @@ -617,7 +516,7 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat 0, 0, Instant.MAX, - LocationUnavailabilityStatus.HealthyWithFailures, + LocationHealthStatus.HealthyWithFailures, false); case Unavailable: return new LocationSpecificContext( @@ -626,7 +525,7 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat 0, 0, Instant.now(), - LocationUnavailabilityStatus.Unavailable, + LocationHealthStatus.Unavailable, true); case HealthyTentative: return new LocationSpecificContext( @@ -635,7 +534,7 @@ public LocationSpecificContext transitionHealthStatus(LocationUnavailabilityStat 0, 0, Instant.MAX, - LocationUnavailabilityStatus.HealthyTentative, + LocationHealthStatus.HealthyTentative, false); default: throw new IllegalStateException("Unsupported health status: " + newStatus); @@ -667,87 +566,10 @@ public int hashCode() { } // todo (abhmohanty): does this need to be public - public enum LocationUnavailabilityStatus { + public enum LocationHealthStatus { Healthy, HealthyWithFailures, Unavailable, HealthyTentative } - private static double getAllowedExceptionToSuccessRatio(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { - - if (isReadOnlyRequest) { - switch (status) { - case HealthyWithFailures: - return 0.3d; - case HealthyTentative: - return 0.1d; - default: - return 0d; - } - } else { - switch (status) { - case HealthyWithFailures: - return 0.2d; - case HealthyTentative: - return 0.05d; - default: - return 0d; - } - } - } - - private static int getAllowedExceptionCountToMaintainStatus(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { - - if (isReadOnlyRequest) { - switch (status) { - case HealthyWithFailures: - return 10; - case HealthyTentative: - return 5; - case Healthy: - case Unavailable: - return 0; - default: - throw new IllegalStateException("Unsupported health status: " + status); - } - } else { - switch (status) { - case HealthyWithFailures: - return 5; - case HealthyTentative: - return 2; - case Healthy: - return 0; - default: - throw new IllegalStateException("Unsupported health status: " + status); - } - } - } - - private static int getMinimumSuccessCountForStatusUpgrade(LocationUnavailabilityStatus status, boolean isReadOnlyRequest) { - if (isReadOnlyRequest) { - switch (status) { - case HealthyTentative: - return 5; - case Unavailable: - case HealthyWithFailures: - case Healthy: - return 0; - default: - throw new IllegalStateException("Unsupported health status: " + status); - } - } else { - switch (status) { - case HealthyTentative: - return 10; - case Unavailable: - case HealthyWithFailures: - case Healthy: - return 0; - default: - throw new IllegalStateException("Unsupported health status: " + status); - } - } - } - // todo: keep private and access through reflection public int getExceptionCountByPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { @@ -755,12 +577,24 @@ public int getExceptionCountByPartitionKeyRange(PartitionKeyRangeWrapper partiti this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); int count = 0; + int regionCountWithFailures = 0; + boolean failuresExist = false; - for (LocationSpecificContext locationSpecificContext : partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition.values()) { - count += locationSpecificContext.exceptionCountForRead + locationSpecificContext.exceptionCountForWrite; + for (LocationSpecificContext locationSpecificContext + : partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition.values()) { + count += locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite(); + + if (locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite() > 0) { + failuresExist = true; + regionCountWithFailures++; + } } - return count; + if (failuresExist) { + return count / regionCountWithFailures; + } + + return 0; } // todo: keep private and access through reflection @@ -774,4 +608,8 @@ public Map getLocationToLocationSpecificContextMap return null; } + + public ConsecutiveExceptionBasedCircuitBreaker getConsecutiveExceptionBasedCircuitBreaker() { + return this.consecutiveExceptionBasedCircuitBreaker; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java new file mode 100644 index 000000000000..d17fc0579a7c --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java @@ -0,0 +1,7 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +public interface ICircuitBreaker { +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java new file mode 100644 index 000000000000..d27bf0561a0c --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java @@ -0,0 +1,68 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import java.time.Instant; + +public class LocationSpecificContext { + private final int exceptionCountForWrite; + private final int successCountForWrite; + private final int exceptionCountForRead; + private final int successCountForRead; + private final Instant unavailableSince; + private final GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus; + private final boolean isExceptionThresholdBreached; + + public LocationSpecificContext( + int successCountForWrite, + int exceptionCountForWrite, + int successCountForRead, + int exceptionCountForRead, + Instant unavailableSince, + GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus, + boolean isExceptionThresholdBreached) { + + this.successCountForWrite = successCountForWrite; + this.exceptionCountForWrite = exceptionCountForWrite; + this.exceptionCountForRead = exceptionCountForRead; + this.successCountForRead = successCountForRead; + this.unavailableSince = unavailableSince; + this.locationHealthStatus = locationHealthStatus; + this.isExceptionThresholdBreached = isExceptionThresholdBreached; + } + + public boolean isExceptionThresholdBreached() { + return this.isExceptionThresholdBreached; + } + + public boolean isRegionAvailableToProcessRequests() { + return this.locationHealthStatus == GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.Healthy || + this.locationHealthStatus == GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures || + this.locationHealthStatus == GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyTentative; + } + + public int getExceptionCountForWrite() { + return exceptionCountForWrite; + } + + public int getSuccessCountForWrite() { + return successCountForWrite; + } + + public int getExceptionCountForRead() { + return exceptionCountForRead; + } + + public int getSuccessCountForRead() { + return successCountForRead; + } + + public Instant getUnavailableSince() { + return unavailableSince; + } + + public GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus getLocationHealthStatus() { + return locationHealthStatus; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java new file mode 100644 index 000000000000..643b030c0c9c --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java @@ -0,0 +1,55 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.Utils; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSetter; +import com.fasterxml.jackson.annotation.Nulls; +import com.fasterxml.jackson.core.JsonProcessingException; + +public class PartitionLevelCircuitBreakerConfig { + + public static final PartitionLevelCircuitBreakerConfig DEFAULT = new PartitionLevelCircuitBreakerConfig(); + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private Boolean isPartitionLevelCircuitBreakerEnabled = false; + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private String circuitBreakerType = "COUNT_BASED"; + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private String circuitBreakerFailureTolerance = "LOW"; + + public Boolean isPartitionLevelCircuitBreakerEnabled() { + return isPartitionLevelCircuitBreakerEnabled; + } + + public String getCircuitBreakerType() { + return circuitBreakerType; + } + + public String getCircuitBreakerFailureTolerance() { + return circuitBreakerFailureTolerance; + } + + public String toJson() { + try { + return Utils.getSimpleObjectMapper().writeValueAsString(this); + } catch (JsonProcessingException e) { + throw new RuntimeException("Unable to convert to Json String", e); + } + } + + public static PartitionLevelCircuitBreakerConfig fromJsonString(String jsonString) { + try { + return Utils.getSimpleObjectMapper().readValue(jsonString, PartitionLevelCircuitBreakerConfig.class); + } catch (JsonProcessingException e) { + throw new RuntimeException("Unable to convert from Json String", e); + } + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index 7792e46d7433..d237f408c62b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -9,7 +9,7 @@ import com.azure.cosmos.implementation.BadRequestException; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.Exceptions; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.InternalServerErrorException; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index 49ff2bd28dd3..f280a2581b58 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -11,7 +11,7 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.OpenConnectionResponse; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java index b719a350ef7d..26286b3a0b1f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java @@ -4,7 +4,7 @@ package com.azure.cosmos.implementation.directconnectivity; import com.azure.cosmos.CosmosContainerProactiveInitConfig; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.directconnectivity.rntbd.ProactiveOpenConnectionsProcessor; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index dff9c788e7b2..eaab1e2e132f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -12,7 +12,6 @@ import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.Exceptions; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.IRetryPolicy; @@ -192,12 +191,6 @@ private RxDocumentServiceResponse completeResponse( new RxDocumentServiceResponse(this.diagnosticsClientContext, storeResponse); rxDocumentServiceResponse.setCosmosDiagnostics(request.requestContext.cosmosDiagnostics); - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = addressResolver.getGlobalPartitionEndpointManagerForCircuitBreaker(); - - if (Configs.isPartitionLevelCircuitBreakerEnabled()) { - globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); - } - return rxDocumentServiceResponse; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java index 8901f3c94c7e..ecde649f6166 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java @@ -6,7 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.GoneException; import com.azure.cosmos.implementation.InvalidPartitionExceptionRetryPolicy; import com.azure.cosmos.implementation.MetadataDiagnosticsContext; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java index 539d2f5075cf..3ede5f8b1736 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Fetcher.java @@ -6,7 +6,7 @@ import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; @@ -175,12 +175,14 @@ private Mono> nextPage(RxDocumentServiceRequest request) { if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); - - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContext(); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContextForCircuitBreaker(); checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' must not be null!"); - feedOperationContextForCircuitBreaker.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); + if (!feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); + feedOperationContextForCircuitBreaker.addPartitionKeyRangeWithSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId()); + } } }) .doOnError(throwable -> completed.set(true)) @@ -201,19 +203,13 @@ private Mono> nextPage(RxDocumentServiceRequest request) { checkNotNull(request.requestContext, "Argument 'request.requestContext' must not be null!"); - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContext(); - + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker = request.requestContext.getFeedOperationContextForCircuitBreaker(); checkNotNull(feedOperationContextForCircuitBreaker, "Argument 'feedOperationContextForCircuitBreaker' must not be null!"); - if (feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!feedOperationContextForCircuitBreaker.getIsRequestHedged() && feedOperationContextForCircuitBreaker.hasPartitionKeyRangeSeenSuccess(request.requestContext.resolvedPartitionKeyRange, request.getResourceId())) { - - if (this.globalEndpointManager != null && this.globalPartitionEndpointManagerForCircuitBreaker != null) { - this.handleLocationExceptionForPartitionKeyRange(request); - } + if (!feedOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + if (this.globalEndpointManager != null) { + this.handleCancellationExceptionForPartitionKeyRange(request); } - } else { - this.handleLocationExceptionForPartitionKeyRange(request); } } @@ -223,7 +219,7 @@ private Mono> nextPage(RxDocumentServiceRequest request) { }); } - private void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { + private void handleCancellationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { URI firstContactedLocationEndpoint = diagnosticsAccessor.getFirstContactedLocationEndpoint(failedRequest.requestContext.cosmosDiagnostics); if (firstContactedLocationEndpoint != null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java index bdfcd17ab72d..28127efc8cf3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java @@ -5,7 +5,7 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.caches.IPartitionKeyRangeCache; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java index 94029c89ede6..d8e220af4131 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/Paginator.java @@ -4,7 +4,7 @@ import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.changefeed.common.ChangeFeedState; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java index 9fddc3f795cc..017cfe460bae 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ServerSideOnlyContinuationFetcherImpl.java @@ -6,8 +6,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; -import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; @@ -15,7 +14,6 @@ import reactor.core.publisher.Mono; import java.util.List; -import java.util.Set; import java.util.function.BiFunction; import java.util.function.Function; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index 5844fbc75ea7..2f6e23979c82 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -9,8 +9,6 @@ import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.DatabaseAccount; import com.azure.cosmos.implementation.DatabaseAccountLocation; -import com.azure.cosmos.implementation.GlobalPartitionEndpointManagerForCircuitBreaker; -import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.Strings; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java index 013d29ce5ab4..a8c2aed8574c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java @@ -76,6 +76,8 @@ exports com.azure.cosmos.implementation.routing to com.azure.cosmos.test; opens com.azure.cosmos to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; opens com.azure.cosmos.models to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; + exports com.azure.cosmos.implementation.circuitBreaker to com.azure.cosmos.encryption, com.azure.cosmos.kafka.connect, com.azure.cosmos.test; + opens com.azure.cosmos.implementation.circuitBreaker to com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; uses com.azure.cosmos.implementation.guava25.base.PatternCompiler; uses com.azure.core.util.tracing.Tracer; From 6797c228f9eb45be743ec46e9c2bee3f9b4539dc Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 14 Jun 2024 15:19:49 -0400 Subject: [PATCH 064/140] Fixing CI pipeline. --- .../azure/cosmos/implementation/Configs.java | 21 +++++++++++++++++++ .../implementation/RxDocumentClientImpl.java | 5 ++--- ...nsecutiveExceptionBasedCircuitBreaker.java | 13 ++++++++++++ 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 7399509c6802..63d22e642f3c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -205,6 +205,11 @@ public class Configs { public static final String DEFAULT_METRICS_CONFIG = CosmosMicrometerMetricsConfig.DEFAULT.toJson(); private static final String DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = PartitionLevelCircuitBreakerConfig.DEFAULT.toJson(); private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"; + private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"; + private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = 2; + private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS"; + private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; + public Configs() { this.sslContext = sslContextInit(); @@ -608,4 +613,20 @@ public static PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreaker return PartitionLevelCircuitBreakerConfig.fromJsonString(partitionLevelCircuitBreakerConfig); } + + public static int getStaleCollectionCacheRefreshRetryCount() { + return getIntValue(System.getProperty( + STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT, + firstNonNull( + emptyToNull(System.getenv().get(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT)), + STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT)), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + } + + public static int getStaleCollectionCacheRefreshRetryIntervalInSeconds() { + return getIntValue(System.getProperty( + STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS, + firstNonNull( + emptyToNull(System.getenv().get(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS)), + STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS)), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 24eed1939352..4de9605e278d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5930,9 +5930,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat CosmosEndToEndOperationLatencyPolicyConfig endToEndPolicyConfig = getEndToEndOperationLatencyPolicyConfig(nonNullRequestOptions, resourceType, operationType); - // todo: investigate retry policy in stale cache scenarios if (collectionRoutingMapValueHolder.v == null) { - return Mono.error(new CollectionRoutingMapNotFoundException("collectionRoutingMapValueHolder.v cannot be null!")); + return Mono.error(new CollectionRoutingMapNotFoundException("Argument 'collectionRoutingMapValueHolder.v' cannot be null!")); } nonNullRequestOptions.setPartitionKeyDefinition(collection.getPartitionKey()); @@ -6088,7 +6087,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat .doOnCancel(() -> diagnosticsFactory.merge(nonNullRequestOptions)); })) ) - .retryWhen(Retry.fixedDelay(10, Duration.ofSeconds(1)) + .retryWhen(Retry.fixedDelay(Configs.getStaleCollectionCacheRefreshRetryCount(), Duration.ofSeconds(Configs.getStaleCollectionCacheRefreshRetryIntervalInSeconds())) .filter(throwable -> throwable instanceof CollectionRoutingMapNotFoundException) .doBeforeRetry((retrySignal) -> this.collectionCache .refresh( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 972946513da7..d131a99bd1ed 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -186,6 +186,7 @@ public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManag return 20; } case Healthy: + return 0; case Unavailable: return 0; default: @@ -200,6 +201,8 @@ public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManag return 10; } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { return 20; + } else { + throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case HealthyTentative: if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { @@ -208,6 +211,8 @@ public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManag return 5; } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { return 3; + } else { + throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case Healthy: return 0; @@ -227,9 +232,13 @@ public int getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManager return 5; } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { return 3; + } else { + throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case Unavailable: + return 0; case HealthyWithFailures: + return 0; case Healthy: return 0; default: @@ -244,9 +253,13 @@ public int getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManager return 10; } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { return 5; + } else { + throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case Unavailable: + return 0; case HealthyWithFailures: + return 0; case Healthy: return 0; default: From cbb5ff853ca882638442f44edf71d999071b2b83 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 14 Jun 2024 15:44:36 -0400 Subject: [PATCH 065/140] Fixing faulty merge. --- .../implementation/ImplementationBridgeHelpers.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 126a3b9eb6c8..e9946d2b49f0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -859,7 +859,8 @@ CosmosDiagnosticsContext create( String trackingId, String connectionMode, String userAgent, - Integer sequenceNumber); + Integer sequenceNumber, + String queryStatement); CosmosDiagnosticsSystemUsageSnapshot createSystemUsageSnapshot( String cpu, @@ -912,6 +913,9 @@ boolean endOperation( Integer getSequenceNumber(CosmosDiagnosticsContext ctx); boolean isEmptyCompletion(CosmosDiagnosticsContext ctx); + + String getQueryStatement(CosmosDiagnosticsContext ctx); + } } @@ -1443,7 +1447,7 @@ public static void setCosmosExceptionAccessor(final CosmosExceptionAccessor newA public interface CosmosExceptionAccessor { CosmosException createCosmosException(int statusCode, Exception innerException); - List getReplicaStatusList(CosmosException cosmosException); + Map> getReplicaStatusList(CosmosException cosmosException); CosmosException setRntbdChannelStatistics(CosmosException cosmosException, RntbdChannelStatistics rntbdChannelStatistics); RntbdChannelStatistics getRntbdChannelStatistics(CosmosException cosmosException); @@ -1523,6 +1527,7 @@ CosmosClientTelemetryConfig createSnapshot( void setUseLegacyTracing(CosmosClientTelemetryConfig config, boolean useLegacyTracing); void setTracer(CosmosClientTelemetryConfig config, Tracer tracer); double getSamplingRate(CosmosClientTelemetryConfig config); + ShowQueryMode showQueryMode(CosmosClientTelemetryConfig config); double[] getDefaultPercentiles(CosmosClientTelemetryConfig config); boolean shouldPublishHistograms(CosmosClientTelemetryConfig config); boolean shouldApplyDiagnosticThresholdsForTransportLevelMeters(CosmosClientTelemetryConfig config); From d391a1cfc34fd61b8b5891aed58c327dfb8001a9 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 14 Jun 2024 16:08:10 -0400 Subject: [PATCH 066/140] Fixing CI pipeline. --- .../ConsecutiveExceptionBasedCircuitBreaker.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index d131a99bd1ed..6e28d758297b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -176,6 +176,8 @@ public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManag return 20; } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { return 40; + } else { + throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case HealthyTentative: if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { @@ -184,6 +186,8 @@ public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManag return 10; } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { return 20; + } else { + throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case Healthy: return 0; From 7e1fd08852041c2744530b1f35798925d800508a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 14 Jun 2024 16:26:32 -0400 Subject: [PATCH 067/140] Fixing CI pipeline. --- sdk/cosmos/azure-cosmos/src/main/java/module-info.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java index a8c2aed8574c..013d29ce5ab4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java @@ -76,8 +76,6 @@ exports com.azure.cosmos.implementation.routing to com.azure.cosmos.test; opens com.azure.cosmos to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; opens com.azure.cosmos.models to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; - exports com.azure.cosmos.implementation.circuitBreaker to com.azure.cosmos.encryption, com.azure.cosmos.kafka.connect, com.azure.cosmos.test; - opens com.azure.cosmos.implementation.circuitBreaker to com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; uses com.azure.cosmos.implementation.guava25.base.PatternCompiler; uses com.azure.core.util.tracing.Tracer; From 6c6760ddb0362f3b7cdfd9a6d71cebbb4b0ee20f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 14 Jun 2024 17:14:04 -0400 Subject: [PATCH 068/140] Fixing CI pipeline. --- sdk/cognitiveservices/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cognitiveservices/pom.xml b/sdk/cognitiveservices/pom.xml index 0c1f8ffac9ed..71329235021d 100644 --- a/sdk/cognitiveservices/pom.xml +++ b/sdk/cognitiveservices/pom.xml @@ -26,7 +26,7 @@ ms-azure-cs-newssearch ms-azure-cs-qnamaker ms-azure-cs-spellcheck - ms-azure-cs-textanalytics + ms-azure-cs-videosearch ms-azure-cs-visualsearch ms-azure-cs-websearch From 3ca4da93bd8dd28a0bfea938bfba506def6df9e7 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 14 Jun 2024 23:33:16 -0400 Subject: [PATCH 069/140] Fixing CI pipeline. --- .../azure/cosmos/implementation/Configs.java | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 63d22e642f3c..9810ed245e84 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -205,7 +205,7 @@ public class Configs { public static final String DEFAULT_METRICS_CONFIG = CosmosMicrometerMetricsConfig.DEFAULT.toJson(); private static final String DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = PartitionLevelCircuitBreakerConfig.DEFAULT.toJson(); private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"; - private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"; + private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT"; private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = 2; private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS"; private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; @@ -615,18 +615,32 @@ public static PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreaker } public static int getStaleCollectionCacheRefreshRetryCount() { - return getIntValue(System.getProperty( - STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT, - firstNonNull( - emptyToNull(System.getenv().get(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT)), - STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT)), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + + if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { + return Integer.valueOf(valueFromSystemProperty); + } + + String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); + if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { + return Integer.valueOf(valueFromEnvVariable); + } + + return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT; } public static int getStaleCollectionCacheRefreshRetryIntervalInSeconds() { - return getIntValue(System.getProperty( - STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS, - firstNonNull( - emptyToNull(System.getenv().get(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS)), - STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS)), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); + String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); + + if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { + return Integer.valueOf(valueFromSystemProperty); + } + + String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); + if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { + return Integer.valueOf(valueFromEnvVariable); + } + + return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS; } } From 6c90d2561170f77366f90a73068ec2615914e521 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 15 Jun 2024 10:22:54 -0400 Subject: [PATCH 070/140] Fixing CI pipeline. --- sdk/cosmos/azure-cosmos/src/main/java/module-info.java | 1 + 1 file changed, 1 insertion(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java index 013d29ce5ab4..059bc624e6bc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java @@ -69,6 +69,7 @@ opens com.azure.cosmos.util to com.fasterxml.jackson.databind; opens com.azure.cosmos.implementation.throughputControl to com.fasterxml.jackson.databind; opens com.azure.cosmos.implementation.throughputControl.controller.group.global to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.circuitBreaker to com.fasterxml.jackson.databind; // exporting packages specifically for cosmos test exports com.azure.cosmos.implementation.faultinjection to com.azure.cosmos.test; From a6483041fb91b298028e05e69728607b51e86f72 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 15 Jun 2024 15:48:30 -0400 Subject: [PATCH 071/140] Fixing CI pipeline. --- .../cosmos/benchmark/AsyncBenchmark.java | 6 + .../azure/cosmos/benchmark/Configuration.java | 7 + .../azure/cosmos/benchmark/SyncBenchmark.java | 6 + .../RxDocumentClientUnderTest.java | 57 +++--- ...ProactiveOpenConnectionsProcessorTest.java | 2 - .../DocumentServiceRequestContext.java | 10 -- ...nsecutiveExceptionBasedCircuitBreaker.java | 3 + ...itionEndpointManagerForCircuitBreaker.java | 167 ++++++++++-------- .../rntbd/RntbdRequestManager.java | 1 - .../implementation/routing/LocationCache.java | 17 -- 10 files changed, 147 insertions(+), 129 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java index ce0e9db24425..c05b78e412a7 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java @@ -88,6 +88,12 @@ abstract class AsyncBenchmark { logger = LoggerFactory.getLogger(this.getClass()); configuration = cfg; + if (configuration.isPartitionLevelCircuitBreakerEnabled()) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + } + CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() .endpoint(cfg.getServiceEndpoint()) .key(cfg.getMasterKey()) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java index b3292b6340e2..17d26a7f72c3 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java @@ -137,6 +137,9 @@ public class Configuration { @Parameter(names = "-isRegionScopedSessionContainerEnabled", description = "A flag to denote whether region scoped session container is enabled") private String isRegionScopedSessionContainerEnabled = String.valueOf(false); + @Parameter(names = "isPartitionLevelCircuitBreakerEnabled", description = "A flag to denote whether partition level circuit breaker is enabled.") + private String isPartitionLevelCircuitBreakerEnabled = String.valueOf(true); + @Parameter(names = "-operation", description = "Type of Workload:\n" + "\tReadThroughput- run a READ workload that prints only throughput *\n" + "\tReadThroughputWithMultipleClients - run a READ workload that prints throughput and latency for multiple client read.*\n" @@ -639,6 +642,10 @@ public boolean isRegionScopedSessionContainerEnabled() { return Boolean.parseBoolean(isRegionScopedSessionContainerEnabled); } + public boolean isPartitionLevelCircuitBreakerEnabled() { + return Boolean.parseBoolean(isPartitionLevelCircuitBreakerEnabled); + } + public void tryGetValuesFromSystem() { serviceEndpoint = StringUtils.defaultString(Strings.emptyToNull(System.getenv().get("SERVICE_END_POINT")), serviceEndpoint); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java index 3c48c5821e03..ccadcb816b21 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java @@ -114,6 +114,12 @@ public T apply(T o, Throwable throwable) { configuration = cfg; logger = LoggerFactory.getLogger(this.getClass()); + if (configuration.isPartitionLevelCircuitBreakerEnabled()) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + } + CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() .endpoint(cfg.getServiceEndpoint()) .preferredRegions(cfg.getPreferredRegionsList()) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java index 411e78d34e4a..927bf09c1764 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java @@ -5,6 +5,7 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.cosmos.ClientUnderTestBuilder; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.http.HttpClient; import com.azure.cosmos.implementation.http.HttpRequest; import com.azure.cosmos.implementation.http.HttpResponse; @@ -64,31 +65,33 @@ public RxDocumentClientUnderTest(URI serviceEndpoint, init(null, null); } -// RxGatewayStoreModel createRxGatewayProxy( -// ISessionContainer sessionContainer, -// ConsistencyLevel consistencyLevel, -// QueryCompatibilityMode queryCompatibilityMode, -// UserAgentContainer userAgentContainer, -// GlobalEndpointManager globalEndpointManager, -// HttpClient rxOrigClient, -// ApiType apiType) { -// -// origHttpClient = rxOrigClient; -// spyHttpClient = Mockito.spy(rxOrigClient); -// -// doAnswer((Answer>) invocationOnMock -> { -// HttpRequest httpRequest = invocationOnMock.getArgument(0, HttpRequest.class); -// Duration responseTimeout = invocationOnMock.getArgument(1, Duration.class); -// httpRequests.add(httpRequest); -// return origHttpClient.send(httpRequest, responseTimeout); -// }).when(spyHttpClient).send(Mockito.any(HttpRequest.class), Mockito.any(Duration.class)); -// -// return super.createRxGatewayProxy(sessionContainer, -// consistencyLevel, -// queryCompatibilityMode, -// userAgentContainer, -// globalEndpointManager, -// spyHttpClient, -// apiType); -// } + RxGatewayStoreModel createRxGatewayProxy( + ISessionContainer sessionContainer, + ConsistencyLevel consistencyLevel, + QueryCompatibilityMode queryCompatibilityMode, + UserAgentContainer userAgentContainer, + GlobalEndpointManager globalEndpointManager, + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker, + HttpClient rxOrigClient, + ApiType apiType) { + + origHttpClient = rxOrigClient; + spyHttpClient = Mockito.spy(rxOrigClient); + + doAnswer((Answer>) invocationOnMock -> { + HttpRequest httpRequest = invocationOnMock.getArgument(0, HttpRequest.class); + Duration responseTimeout = invocationOnMock.getArgument(1, Duration.class); + httpRequests.add(httpRequest); + return origHttpClient.send(httpRequest, responseTimeout); + }).when(spyHttpClient).send(Mockito.any(HttpRequest.class), Mockito.any(Duration.class)); + + return super.createRxGatewayProxy(sessionContainer, + consistencyLevel, + queryCompatibilityMode, + userAgentContainer, + globalEndpointManager, + spyHttpClient, + apiType, + globalPartitionEndpointManagerForCircuitBreaker); + } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java index ba74763bb3bc..e24f5968e9d6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/ProactiveOpenConnectionsProcessorTest.java @@ -163,8 +163,6 @@ public void recordNewAddressesAfterSplitTest() { try { - Thread.sleep(10_000); - int totalRequests = 200; List preferredRegions = this.writeRegionMap.keySet().stream().collect(Collectors.toList()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index cbd137d1fa8e..bd406eba9c4d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -53,8 +53,6 @@ public class DocumentServiceRequestContext implements Cloneable { private volatile long approximateBloomFilterInsertionCount; private final Set sessionTokenEvaluationResults = ConcurrentHashMap.newKeySet(); private volatile List unavailableRegionsForPartition; - private volatile boolean isRequestHedged = false; - public volatile boolean isRequestSendingStarted = false; // For cancelled rntbd requests, track the response as OperationCancelledException which later will be used to populate the cosmosDiagnostics public final Map rntbdCancelledRequestMap = new ConcurrentHashMap<>(); @@ -174,14 +172,6 @@ public void setExcludeRegions(List excludeRegions) { this.excludeRegions = excludeRegions; } - public void setIsRequestHedged(boolean isRequestHedged) { - this.isRequestHedged = isRequestHedged; - } - - public boolean isRequestHedged() { - return this.isRequestHedged; - } - public List getUnavailableRegionsForPartition() { return unavailableRegionsForPartition; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 6e28d758297b..76b1fdb0688b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -1,3 +1,6 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + package com.azure.cosmos.implementation.circuitBreaker; import org.slf4j.Logger; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index f5761f850c60..57184cc3d0ae 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -57,8 +57,8 @@ public void init() { public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest request, URI failedLocation) { - checkNotNull(request, "request cannot be null!"); - checkNotNull(request.requestContext, "requestContext cannot be null!"); + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; @@ -66,12 +66,10 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest return; } - String resourceId = request.getResourceId(); - checkNotNull(resourceId, "resourceId cannot be null!"); - - logger.info("Handling exception : {}", resourceId); + String collectionResourceId = request.getResourceId(); + checkNotNull(collectionResourceId, "Argument 'collectionResourceId' cannot be null!"); - PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, collectionResourceId); AtomicBoolean isFailoverPossible = new AtomicBoolean(true); AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); @@ -104,23 +102,33 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest return; } + if (logger.isWarnEnabled()) { + logger.warn("It is not possible to mark region {} as Unavailable for partition key range {}-{} and collection rid {} " + + "as all regions will be Unavailable in that case, will remove health status tracking for this partition!", + this.globalEndpointManager.getRegionName( + failedLocation, request.isReadOnlyRequest() ? OperationType.Read : OperationType.Create), + partitionKeyRange.getMinInclusive(), + partitionKeyRange.getMaxExclusive(), + collectionResourceId); + } + // no regions to fail over to this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.remove(partitionKeyRangeWrapper); } public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest request) { - checkNotNull(request, "request cannot be null!"); - checkNotNull(request.requestContext, "requestContext cannot be null!"); + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; + // todo: how to handle this? if (partitionKeyRange == null) { return; } String resourceId = request.getResourceId(); -// logger.info("Handling success : {}", resourceId); PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); URI succeededLocation = request.requestContext.locationEndpointToRoute; @@ -140,14 +148,12 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r }); } - public List getUnavailableLocationEndpointsForPartitionKeyRange(String resourceId, PartitionKeyRange partitionKeyRange) { + public List getUnavailableLocationEndpointsForPartitionKeyRange(String collectionResourceId, PartitionKeyRange partitionKeyRange) { - checkNotNull(partitionKeyRange, "Supplied partitionKeyRange cannot be null!"); - checkNotNull(resourceId, "Supplied resourceId cannot be null!"); + checkNotNull(partitionKeyRange, "Argument 'partitionKeyRange' cannot be null!"); + checkNotNull(collectionResourceId, "Argument 'collectionResourceId' cannot be null!"); -// logger.info("Fetching unavailable regions for resource address : {}", resourceId); - - PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, collectionResourceId); PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); @@ -362,14 +368,12 @@ public LocationSpecificContext handleSuccess( boolean forceStatusChange, boolean isReadOnlyRequest) { - logger.info("Handling success"); - - LocationHealthStatus currentStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); - int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); - int successCountActual = isReadOnlyRequest ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForRead(); + int exceptionCountActual + = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); - switch (currentStatusSnapshot) { + switch (currentLocationHealthStatusSnapshot) { case Healthy: break; case HealthyWithFailures: @@ -387,15 +391,17 @@ public LocationSpecificContext handleSuccess( LocationSpecificContext locationSpecificContextInner = GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificContext, isReadOnlyRequest); - logger.info("Try to switch to Healthy but actual success count : {}", successCountActual); - if (GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificContextInner, isReadOnlyRequest)) { - logger.info("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + return this.transitionHealthStatus(LocationHealthStatus.Healthy); } else { return locationSpecificContextInner; @@ -407,28 +413,33 @@ public LocationSpecificContext handleSuccess( if (!forceStatusChange) { if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { - // todo: make debug - logger.info("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); + } + } else { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), partitionKeyRangeWrapper.resourceId, GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - - return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); } - } else { - logger.info("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); } break; default: - throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); + throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); } return locationSpecificContext; @@ -440,18 +451,20 @@ public LocationSpecificContext handleException( URI locationWithException, boolean isReadOnlyRequest) { - logger.warn("Handling exception"); + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); - LocationHealthStatus currentStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); - - switch (currentStatusSnapshot) { + switch (currentLocationHealthStatusSnapshot) { case Healthy: - logger.info("Partition {}-{} of collection : {} marked as HealthyWithFailures from Healthy for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyWithFailures from Healthy for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + return this.transitionHealthStatus(LocationHealthStatus.HealthyWithFailures); case HealthyWithFailures: if (!GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { @@ -459,24 +472,30 @@ public LocationSpecificContext handleException( LocationSpecificContext locationSpecificContextInner = GlobalPartitionEndpointManagerForCircuitBreaker .this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); - logger.info("Partition {}-{} of collection : {} has exception count of {} for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - isReadOnlyRequest ? locationSpecificContextInner.getExceptionCountForRead() : locationSpecificContextInner.getExceptionCountForWrite(), - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} has exception count of {} for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + isReadOnlyRequest ? locationSpecificContextInner.getExceptionCountForRead() : locationSpecificContextInner.getExceptionCountForWrite(), + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } return locationSpecificContextInner; } else { GlobalPartitionEndpointManagerForCircuitBreaker .this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); - logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyWithFailures for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + + if (logger.isDebugEnabled()) { + logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyWithFailures for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + return this.transitionHealthStatus(LocationHealthStatus.Unavailable); } case HealthyTentative: @@ -484,16 +503,20 @@ public LocationSpecificContext handleException( return GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); } else { - logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", + partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), + partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), + partitionKeyRangeWrapper.resourceId, + GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + return this.transitionHealthStatus(LocationHealthStatus.Unavailable); } default: - throw new IllegalStateException("Unsupported health status: " + currentStatusSnapshot); + throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java index 3bb49746e35f..6d270bc21ff4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java @@ -636,7 +636,6 @@ public void write(final ChannelHandlerContext context, final Object message, fin if (!record.isCancelled()) { record.setSendingRequestHasStarted(); - record.args().serviceRequest().requestContext.isRequestSendingStarted = true; this.timestamps.channelWriteAttempted(); if (this.serverErrorInjector != null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index 2f6e23979c82..a97d2c6d7aed 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -237,14 +237,6 @@ public UnmodifiableList getApplicableWriteEndpoints(List excludedRe List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); if (unavailableRegionsForPartition != null) { - - // todo: remove logging statements eventually - logger.info("Printing unavailable location for partition"); - - for (String unavailableRegionForPartition : unavailableRegionsForPartition) { - logger.info("Unavailable region : {}", unavailableRegionForPartition); - } - effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); } @@ -257,7 +249,6 @@ public UnmodifiableList getApplicableWriteEndpoints(List excludedRe } public UnmodifiableList getApplicableReadEndpoints(RxDocumentServiceRequest request) { - logger.info("In getApplicableReadEndpoints with RxDocumentServiceRequest request"); return this.getApplicableReadEndpoints(request.requestContext.getExcludeRegions(), request.requestContext.getUnavailableRegionsForPartition()); } @@ -279,14 +270,6 @@ public UnmodifiableList getApplicableReadEndpoints(List excludedReg List effectiveExcludedRegionsWithPartitionUnavailableRegions = new ArrayList<>(effectiveExcludedRegions); if (unavailableRegionsForPartition != null) { - - // todo: remove logging statements eventually - logger.info("Printing unavailable region for partition"); - - for (String unavailableRegionForPartition : unavailableRegionsForPartition) { - logger.info("Unavailable region : {}", unavailableRegionForPartition); - } - effectiveExcludedRegionsWithPartitionUnavailableRegions.addAll(unavailableRegionsForPartition); } From 9060f3f7e7e8f489749b50561e3a6128dbf3f50e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 15 Jun 2024 16:26:25 -0400 Subject: [PATCH 072/140] Fixing CI pipeline. --- .../com/azure/cosmos/benchmark/AsyncBenchmark.java | 11 +++++++++++ .../com/azure/cosmos/benchmark/SyncBenchmark.java | 10 ++++++++++ .../cosmos/implementation/routing/LocationCache.java | 3 --- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java index c05b78e412a7..df76564b8bdf 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java @@ -174,6 +174,17 @@ abstract class AsyncBenchmark { ).block(); cosmosAsyncContainer = cosmosAsyncDatabase.getContainer(this.configuration.getCollectionId()); + + // add some delay to allow container to be created across multiple regions + // container creation across regions is an async operation + // without the delay a container may not be available to process reads / writes + + try { + Thread.sleep(30_000); + } catch (Exception exception) { + throw new RuntimeException(exception); + } + logger.info("Collection {} is created for this test", this.configuration.getCollectionId()); collectionCreated = true; } else { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java index ccadcb816b21..e5666826c884 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java @@ -177,6 +177,16 @@ public T apply(T o, Throwable throwable) { ThroughputProperties.createManualThroughput(this.configuration.getThroughput())); cosmosContainer = cosmosDatabase.getContainer(this.configuration.getCollectionId()); logger.info("Collection {} is created for this test", this.configuration.getCollectionId()); + + // add some delay to allow container to be created across multiple regions + // container creation across regions is an async operation + // without the delay a container may not be available to process reads / writes + try { + Thread.sleep(30_000); + } catch (Exception exception) { + throw new RuntimeException(exception); + } + collectionCreated = true; } else { throw e; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index a97d2c6d7aed..e6af386c5822 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -205,8 +205,6 @@ public URI resolveServiceEndpoint(RxDocumentServiceRequest request) { return this.defaultEndpoint; } } else { - - logger.info("In resolveServiceEndpoint"); UnmodifiableList endpoints = request.getOperationType().isWriteOperation()? this.getApplicableWriteEndpoints(request) : this.getApplicableReadEndpoints(request); return endpoints.get(locationIndex % endpoints.size()); @@ -214,7 +212,6 @@ public URI resolveServiceEndpoint(RxDocumentServiceRequest request) { } public UnmodifiableList getApplicableWriteEndpoints(RxDocumentServiceRequest request) { - logger.info("In getApplicableWriteEndpoints with RxDocumentServiceRequest request"); return this.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions(), request.requestContext.getUnavailableRegionsForPartition()); } From 9fb79dd654ccec0a3ada346a0841a61baf4ab8a5 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 17 Jun 2024 13:11:05 -0400 Subject: [PATCH 073/140] Fixing live tests pipeline. --- .../PartitionLevelCircuitBreakerTests.java | 1 - .../azure/cosmos/rx/ResourceTokenTest.java | 41 +++++++++++-------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index eb3a54f286f8..f107bb6d8828 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -82,7 +82,6 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { = ImplementationBridgeHelpers.CosmosAsyncContainerHelper.getCosmosAsyncContainerAccessor(); private List writeRegions; - private static final CosmosEndToEndOperationLatencyPolicyConfig noEndToEndTimeout = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofDays(1)).build(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java index 6c68e43b642e..df6c5b9acae1 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java @@ -39,6 +39,7 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import javax.print.Doc; import java.util.ArrayList; import java.util.List; import java.util.UUID; @@ -172,30 +173,30 @@ public Object[][] collectionAndPermissionData() { public Object[][] documentAndPermissionData() { return new Object[][]{ //These tests will try to read document from its own getPermission and validate it, both with request Id and getName. - {createdDocument.getSelfLink(), createdDocPermission, createdDocument.getId(), null}, + {createdDocument.getSelfLink(), createdDocPermission, createdCollection, createdDocument.getId(), null}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollection.getId(), createdDocument.getId()), createdDocPermissionWithName, createdDocument.getId(), null}, //These tests will try to read document from its getPermission having partition getKey 1 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey.getSelfLink(), createdDocPermissionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, + {createdDocumentWithPartitionKey.getSelfLink(), createdDocPermissionWithPartitionKey, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, // TODO uncomment after https://github.com/Azure/azure-sdk-for-java/issues/26050 // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey.getId()), createdDocPermissionWithPartitionKeyWithName // , createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //These tests will try to read document from its getPermission having partition getKey 2 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey2.getSelfLink(), createdDocPermissionWithPartitionKey2, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, + {createdDocumentWithPartitionKey2.getSelfLink(), createdDocPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), // createdDocPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, // These tests will try to read document from its parent collection getPermission and validate it, both with request Id and getName. - {createdDocument.getSelfLink(), createdCollPermission, createdDocument.getId(), null}, + {createdDocument.getSelfLink(), createdCollPermission, createdCollection, createdDocument.getId(), null}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollection.getId(), createdDocument.getId()), createdCollPermissionWithName, createdDocument.getId(), null}, //This test will try to read document from collection getPermission having partition getKey 1 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, + {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //{TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey.getId()), createdColPermissionWithPartitionKeyWithName, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //This test will try to read document from collection getPermission having partition getKey 2 and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey2, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, + {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, //{TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), createdColPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2} }; @@ -205,7 +206,7 @@ public Object[][] documentAndPermissionData() { public Object[][] documentAndPermissionDataForResourceNotFound() { return new Object[][]{ //This test will try to read document from its resource token directly and validate it. - {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, PARTITION_KEY_VALUE}, + {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, createdCollectionWithPartitionKey, PARTITION_KEY_VALUE}, //This test will try to read document from its parent collection resource token directly and validate it. // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), // createdColPermissionWithPartitionKeyWithName, PARTITION_KEY_VALUE} @@ -216,13 +217,13 @@ public Object[][] documentAndPermissionDataForResourceNotFound() { public Object[][] documentAndMultipleCollPermissionData() { return new Object[][]{ //These tests will try to read document from partition 1 with two collection getPermissions having different partition keys and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdDocumentWithPartitionKey.getId(), + {createdDocumentWithPartitionKey.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey.getId()), createdColPermissionWithPartitionKeyWithName // , createdColPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey.getId(), PARTITION_KEY_VALUE}, //These tests will try to read document from partition 1 with two collection getPermissions having different partition keys and validate it, both with request Id and getName. - {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdDocumentWithPartitionKey2.getId(), + {createdDocumentWithPartitionKey2.getSelfLink(), createdColPermissionWithPartitionKey, createdColPermissionWithPartitionKey2, createdCollectionWithPartitionKey, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2}, // {TestUtils.getDocumentNameLink(createdDatabase.getId(), createdCollectionWithPartitionKey.getId(), createdDocumentWithPartitionKey2.getId()), createdColPermissionWithPartitionKeyWithName // , createdColPermissionWithPartitionKey2WithName, createdDocumentWithPartitionKey2.getId(), PARTITION_KEY_VALUE_2} @@ -286,11 +287,13 @@ public void readCollectionFromPermissionFeed(String collectionUrl, Permission pe * * @throws Exception */ - @Test(groups = { "fast" }, dataProvider = "documentAndPermissionData", timeOut = TIMEOUT) - public void readDocumentFromPermissionFeed(String documentUrl, Permission permission, String documentId, String partitionKey) throws Exception { + @Test(groups = { "fast" }, dataProvider = "documentAndPermissionData"/*, timeOut = TIMEOUT*/) + public void readDocumentFromPermissionFeed(String documentUrl, Permission permission, DocumentCollection documentCollection, String documentId, String partitionKey) throws Exception { AsyncDocumentClient asyncClientResourceToken = null; + try { List permissionFeed = new ArrayList<>(); + permissionFeed.add(permission); ConnectionPolicy defaultPolicy = ConnectionPolicy.getDefaultPolicy(); defaultPolicy.setConnectionMode(ConnectionMode.GATEWAY); @@ -310,8 +313,9 @@ public void readDocumentFromPermissionFeed(String documentUrl, Permission permis } else { options.setPartitionKey(PartitionKey.NONE); } + Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + .readDocument(documentUrl, options, documentCollection.getSelfLink()); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(documentId).build(); validateSuccess(readObservable, validator); @@ -342,7 +346,7 @@ public void readDocumentFromResouceToken(String resourceToken) throws Exception RequestOptions options = new RequestOptions(); options.setPartitionKey(PartitionKey.NONE); Mono> readObservable = asyncClientResourceToken - .readDocument(createdDocument.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + .readDocument(createdDocument.getSelfLink(), options, createdCollection.getSelfLink()); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(createdDocument.getId()).build(); validateSuccess(readObservable, validator); @@ -357,7 +361,7 @@ public void readDocumentFromResouceToken(String resourceToken) throws Exception * @throws Exception */ @Test(groups = {"fast"}, dataProvider = "documentAndMultipleCollPermissionData", timeOut = TIMEOUT) - public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(String documentUrl, Permission collPermission1, Permission collPermission2, String documentId, String partitionKey) throws Exception { + public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(String documentUrl, Permission collPermission1, Permission collPermission2, DocumentCollection documentCollection, String documentId, String partitionKey) throws Exception { AsyncDocumentClient asyncClientResourceToken = null; try { List permissionFeed = new ArrayList<>(); @@ -378,7 +382,7 @@ public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(Strin RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(partitionKey)); Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + .readDocument(documentUrl, options, documentCollection.getSelfLink()); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(documentId).build(); validateSuccess(readObservable, validator); @@ -394,7 +398,7 @@ public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(Strin * @throws Exception */ @Test(groups = { "fast" },dataProvider = "documentAndPermissionDataForResourceNotFound", timeOut = TIMEOUT) - public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound(String documentUrl, Permission permission, String partitionKey) throws Exception { + public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound(String documentUrl, Permission permission, DocumentCollection documentCollection, String partitionKey) throws Exception { AsyncDocumentClient asyncClientResourceToken = null; try { List permissionFeed = new ArrayList<>(); @@ -413,8 +417,9 @@ public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound( .build(); RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(partitionKey)); + Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + .readDocument(documentUrl, options, documentCollection.getSelfLink()); FailureValidator validator = new FailureValidator.Builder().resourceNotFound().build(); validateFailure(readObservable, validator); } finally { @@ -446,7 +451,7 @@ public void readDocumentFromCollPermissionWithDiffPartitionKey_WithException() t RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(PARTITION_KEY_VALUE_2)); Mono> readObservable = asyncClientResourceToken - .readDocument(createdDocumentWithPartitionKey.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + .readDocument(createdDocumentWithPartitionKey.getSelfLink(), options, createdCollection.getSelfLink()); FailureValidator validator = new FailureValidator.Builder().resourceTokenNotFound().build(); validateFailure(readObservable, validator); } finally { From 7472e3b5cd322a7fbbbb70a6ebc3744dcb467e2d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 17 Jun 2024 21:26:36 -0400 Subject: [PATCH 074/140] Fixing live tests pipeline. --- .../PartitionLevelCircuitBreakerTests.java | 450 +++++++----------- 1 file changed, 179 insertions(+), 271 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index f107bb6d8828..6261d4e5bebd 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -20,6 +20,7 @@ import com.azure.cosmos.faultinjection.FaultInjectionTestBase; import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.circuitBreaker.ConsecutiveExceptionBasedCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; @@ -126,6 +127,50 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { } }; + Consumer> validateResponseHasSuccess = (responseWrapper) -> { + + assertThat(responseWrapper.cosmosException).isNull(); + + if (responseWrapper.feedResponse != null) { + assertThat(responseWrapper.feedResponse.getCosmosDiagnostics()).isNotNull(); + assertThat(responseWrapper.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()).isNotNull(); + + CosmosDiagnosticsContext diagnosticsContext = responseWrapper.feedResponse.getCosmosDiagnostics().getDiagnosticsContext(); + + assertThat(diagnosticsContext.getStatusCode() == HttpConstants.StatusCodes.OK || diagnosticsContext.getStatusCode() == HttpConstants.StatusCodes.NOT_MODIFIED).isTrue(); + } else if (responseWrapper.cosmosItemResponse != null) { + assertThat(responseWrapper.cosmosItemResponse.getDiagnostics()).isNotNull(); + assertThat(responseWrapper.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()).isNotNull(); + + CosmosDiagnosticsContext diagnosticsContext = responseWrapper.cosmosItemResponse.getDiagnostics().getDiagnosticsContext(); + + assertThat(HttpConstants.StatusCodes.OK <= diagnosticsContext.getStatusCode() && diagnosticsContext.getStatusCode() <= HttpConstants.StatusCodes.NO_CONTENT).isTrue(); + } else if (responseWrapper.batchResponse != null) { + assertThat(responseWrapper.batchResponse.getDiagnostics()).isNotNull(); + assertThat(responseWrapper.batchResponse.getDiagnostics().getDiagnosticsContext()).isNotNull(); + + CosmosDiagnosticsContext diagnosticsContext = responseWrapper.batchResponse.getDiagnostics().getDiagnosticsContext(); + + assertThat(HttpConstants.StatusCodes.OK <= diagnosticsContext.getStatusCode() && diagnosticsContext.getStatusCode() <= HttpConstants.StatusCodes.NO_CONTENT).isTrue(); + } + }; + + Consumer> validateResponseHasOperationCancelledException = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.REQUEST_TIMEOUT); + assertThat(responseWrapper.cosmosException.getSubStatusCode()).isEqualTo(HttpConstants.SubStatusCodes.CLIENT_OPERATION_TIMEOUT); + }; + + Consumer> validateResponseHasInternalServerError = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.INTERNAL_SERVER_ERROR); + }; + + Consumer> validateResponseHasServiceUnavailableError = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE); + }; + private final Function> buildServiceUnavailableError = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; @@ -225,6 +270,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + validateResponseHasSuccess, + validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -240,6 +287,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -255,6 +304,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -267,9 +318,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateServiceUnavailableRules, + noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -285,6 +339,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -300,6 +356,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -315,6 +373,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -330,6 +390,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -345,6 +407,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -360,6 +424,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -375,6 +441,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -390,6 +458,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -405,6 +475,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -420,6 +492,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -435,6 +509,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -450,6 +526,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -466,6 +544,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -482,6 +562,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -497,6 +579,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -512,6 +596,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -527,6 +613,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -542,6 +630,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -557,6 +647,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -572,21 +664,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -602,6 +681,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -617,6 +698,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -632,21 +715,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateReadOrWriteSessionNotAvailableRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -662,6 +732,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -677,6 +749,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -692,71 +766,13 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, - new Object[]{ - String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), - generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), - generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), - generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), - generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, new Object[] { String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -767,146 +783,13 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { noEndToEndTimeout, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, - new Object[]{ - String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), - generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), - generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -918,6 +801,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -934,36 +819,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), - generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), - generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -979,6 +836,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -994,6 +853,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1009,6 +870,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1024,6 +887,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1039,6 +904,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, noRegionSwitchHint, nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1054,6 +921,8 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, noRegionSwitchHint, !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1101,6 +970,8 @@ public Object[][] readManyTestConfigs() { executeReadManyOperation, noEndToEndTimeout, noRegionSwitchHint, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1116,6 +987,8 @@ public Object[][] readManyTestConfigs() { executeReadManyOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1131,6 +1004,8 @@ public Object[][] readManyTestConfigs() { executeReadManyOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1146,6 +1021,8 @@ public Object[][] readManyTestConfigs() { executeReadManyOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1161,6 +1038,8 @@ public Object[][] readManyTestConfigs() { executeReadManyOperation, noEndToEndTimeout, noRegionSwitchHint, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1210,6 +1089,8 @@ public Object[][] readAllTestConfigs() { executeReadManyOperation, noEndToEndTimeout, noRegionSwitchHint, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1225,6 +1106,8 @@ public Object[][] readAllTestConfigs() { executeReadManyOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1240,6 +1123,8 @@ public Object[][] readAllTestConfigs() { executeReadManyOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1255,6 +1140,8 @@ public Object[][] readAllTestConfigs() { executeReadManyOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1270,6 +1157,8 @@ public Object[][] readAllTestConfigs() { executeReadManyOperation, noEndToEndTimeout, noRegionSwitchHint, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, @@ -1278,7 +1167,7 @@ public Object[][] readAllTestConfigs() { }; } - @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs") + @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs", timeOut = 80 * TIMEOUT) public void operationHitsTerminalExceptionAcrossKRegions( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -1286,6 +1175,8 @@ public void operationHitsTerminalExceptionAcrossKRegions( CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, CosmosRegionSwitchHint regionSwitchHint, Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, @@ -1374,6 +1265,8 @@ public void operationHitsTerminalExceptionAcrossKRegions( generateFaultInjectionRules, executeDataPlaneOperation, regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, @@ -1381,7 +1274,7 @@ public void operationHitsTerminalExceptionAcrossKRegions( 15); } - @Test(groups = {"multi-master"}, dataProvider = "readManyTestConfigs") + @Test(groups = {"multi-master"}, dataProvider = "readManyTestConfigs", timeOut = 80 * TIMEOUT) public void readManyOperationHitsTerminalExceptionAcrossKRegions( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -1389,6 +1282,8 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( Function> executeDataPlaneOperation, CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, @@ -1472,6 +1367,8 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( generateFaultInjectionRules, executeDataPlaneOperation, regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, @@ -1479,7 +1376,7 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( 15); } - @Test(groups = {"multi-master"}, dataProvider = "readAllTestConfigs") + @Test(groups = {"multi-master"}, dataProvider = "readAllTestConfigs", timeOut = 80 * TIMEOUT) public void readAllOperationHitsTerminalExceptionAcrossKRegions( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -1487,6 +1384,8 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( Function> executeDataPlaneOperation, CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, @@ -1571,6 +1470,8 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( generateFaultInjectionRules, executeDataPlaneOperation, regionSwitchHint, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, @@ -1812,6 +1713,8 @@ private void execute( Function> generateFaultInjectionRules, Function> executeDataPlaneOperation, CosmosRegionSwitchHint regionSwitchHint, + Consumer> validateResponseInPresenceOfFailures, + Consumer> validateResponseInAbsenceOfFailures, Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, @@ -1928,21 +1831,31 @@ private void execute( ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); - int expectedCircuitBreakingThreshold = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? 5 : 10; + ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker(); + + int expectedCircuitBreakingThreshold + = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, false) : + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, true); if (!hasReachedCircuitBreakingThreshold) { hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == globalPartitionEndpointManagerForCircuitBreaker.getExceptionCountByPartitionKeyRange( new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId())); + validateResponseInPresenceOfFailures.accept(response); } else { executionCountAfterCircuitBreakingThresholdBreached++; } + if (executionCountAfterCircuitBreakingThresholdBreached > 1) { + validateResponseInAbsenceOfFailures.accept(response); + } + if (response.cosmosItemResponse != null) { assertThat(response.cosmosItemResponse).isNotNull(); assertThat(response.cosmosItemResponse.getDiagnostics()).isNotNull(); if (executionCountAfterCircuitBreakingThresholdBreached > 1) { - logger.info("In circuit breaking assertion for item response..."); validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.cosmosItemResponse.getDiagnostics().getDiagnosticsContext()); } } else if (response.feedResponse != null) { @@ -1950,7 +1863,6 @@ private void execute( assertThat(response.feedResponse.getCosmosDiagnostics()).isNotNull(); if (executionCountAfterCircuitBreakingThresholdBreached > 1) { - logger.info("In circuit breaking assertion for feed response..."); validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.feedResponse.getCosmosDiagnostics().getDiagnosticsContext()); } } else if (response.cosmosException != null) { @@ -1958,8 +1870,6 @@ private void execute( assertThat(response.cosmosException.getDiagnostics()).isNotNull(); if (!hasReachedCircuitBreakingThreshold) { - - logger.info("In circuit breaking assertion for exception..."); CosmosDiagnosticsContext ctx = response.cosmosException.getDiagnostics().getDiagnosticsContext(); validateRegionsContactedWhenExceptionBubblesUp.accept(ctx); @@ -1969,7 +1879,6 @@ private void execute( assertThat(response.batchResponse.getDiagnostics()).isNotNull(); if (executionCountAfterCircuitBreakingThresholdBreached > 1) { - logger.info("In circuit breaking assertion for batch response..."); validateRegionsContactedWhenShortCircuitingHasKickedIn.accept(response.batchResponse.getDiagnostics().getDiagnosticsContext()); } } @@ -1987,8 +1896,7 @@ private void execute( } ResponseWrapper response = executeDataPlaneOperation.apply(operationInvocationParamsWrapper); - - logger.info("Hit count : {}", faultInjectionRules.stream().mapToLong(FaultInjectionRule::getHitCount).sum()); + validateResponseInAbsenceOfFailures.accept(response); if (response.cosmosItemResponse != null) { assertThat(response.cosmosItemResponse).isNotNull(); @@ -2708,10 +2616,6 @@ private static void validateNonEmptyList(List list) { assertThat(list).isNotEmpty(); } - private static boolean isReadManyOperation(OperationInvocationParamsWrapper paramsWrapper) { - return !(paramsWrapper.itemIdentitiesForReadManyOperation == null || paramsWrapper.itemIdentitiesForReadManyOperation.isEmpty()); - } - private static void deleteAllDocuments(CosmosAsyncContainer asyncContainer) { asyncContainer .queryItems("SELECT * FROM C", TestObject.class) @@ -2721,7 +2625,11 @@ private static void deleteAllDocuments(CosmosAsyncContainer asyncContainer) { .blockLast(); } + private static boolean requiresClientLevelE2EConfig(FaultInjectionOperationType faultInjectionOperationType) { + return faultInjectionOperationType == FaultInjectionOperationType.READ_FEED_ITEM; + } + private enum QueryType { - READ_MANY, READ_ALL, QUERY_TEXT_BASED + READ_MANY, READ_ALL, QUERY_TEXT_BASED, READ_FEED } } From 94e89d2412c5fcbbce3ab40bbbe27e07c315ca47 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 18 Jun 2024 17:53:15 -0400 Subject: [PATCH 075/140] Refactoring. --- .../ClientConfigDiagnosticsTest.java | 31 +- ...EndpointManagerForCircuitBreakerTests.java | 49 ++- .../PartitionLevelCircuitBreakerTests.java | 8 +- .../ClientSideRequestStatistics.java | 25 +- .../DiagnosticsClientContext.java | 17 + .../DocumentServiceRequestContext.java | 9 + ...FeedOperationContextForCircuitBreaker.java | 11 +- .../implementation/RxDocumentClientImpl.java | 68 +--- .../RxDocumentServiceRequest.java | 11 + ...nsecutiveExceptionBasedCircuitBreaker.java | 23 +- ...itionEndpointManagerForCircuitBreaker.java | 291 +++--------------- .../LocationContextTransitionHandler.java | 233 ++++++++++++++ .../circuitBreaker/LocationHealthStatus.java | 21 ++ .../LocationSpecificContext.java | 12 +- .../PartitionKeyRangeWrapper.java | 39 +++ .../PartitionLevelCircuitBreakerConfig.java | 12 + .../feedranges/FeedRangeEpkImpl.java | 2 + .../feedranges/FeedRangePartitionKeyImpl.java | 2 +- .../FeedRangePartitionKeyRangeImpl.java | 2 +- 19 files changed, 503 insertions(+), 363 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationContextTransitionHandler.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationHealthStatus.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java index 9329fb405a2d..03790701c060 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java @@ -11,6 +11,7 @@ import com.azure.cosmos.CosmosRegionSwitchHint; import com.azure.cosmos.SessionRetryOptions; import com.azure.cosmos.SessionRetryOptionsBuilder; +import com.azure.cosmos.implementation.circuitBreaker.PartitionLevelCircuitBreakerConfig; import com.azure.cosmos.implementation.directconnectivity.RntbdTransportClient; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.implementation.http.HttpClientConfig; @@ -73,7 +74,8 @@ public Object[][] clientCfgProvider() { aggressiveWarmUpDuration1, proactiveConnectionRegionCount1, cosmosContainerIdentities, - false // is region scoped session capturing enabled + false, // is region scoped session capturing enabled + false // is partition-level circuit breaking enabled }, { new CosmosContainerProactiveInitConfigBuilder(cosmosContainerIdentities) @@ -83,7 +85,8 @@ public Object[][] clientCfgProvider() { aggressiveWarmUpDuration2, proactiveConnectionRegionCount2, cosmosContainerIdentities, - true + true, // is region scoped session capturing enabled + false // is partition-level circuit breaking enabled }, { new CosmosContainerProactiveInitConfigBuilder(cosmosContainerIdentities) @@ -92,7 +95,8 @@ public Object[][] clientCfgProvider() { null, proactiveConnectionRegionCount3, cosmosContainerIdentities, - false + false, // is region scoped session capturing enabled + true // is partition-level circuit breaking enabled } }; } @@ -243,7 +247,8 @@ public void full( Duration aggressiveWarmupDuration, int proactiveConnectionRegionCount, List cosmosContainerIdentities, - boolean isRegionScopedSessionCapturingEnabled) throws Exception { + boolean isRegionScopedSessionCapturingEnabled, + boolean isPartitionLevelCircuitBreakerEnabled) throws Exception { DiagnosticsClientContext clientContext = Mockito.mock(DiagnosticsClientContext.class); System.setProperty("COSMOS.REPLICA_ADDRESS_VALIDATION_ENABLED", "false"); @@ -277,6 +282,17 @@ public void full( diagnosticsClientConfig.withRegionScopedSessionContainerOptions(regionScopedSessionContainer); } + if (isPartitionLevelCircuitBreakerEnabled) { + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"COUNT_BASED\"," + + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + + PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); + diagnosticsClientConfig.withPartitionLevelCircuitBreakerConfig(partitionLevelCircuitBreakerConfig); + } + Mockito.doReturn(diagnosticsClientConfig).when(clientContext).getConfig(); StringWriter jsonWriter = new StringWriter(); @@ -301,11 +317,18 @@ public void full( assertThat(objectNode.get("regionScopedSessionCfg")).isNull(); } + if (isPartitionLevelCircuitBreakerEnabled) { + assertThat(objectNode.get("partitionLevelCircuitBreakerCfg").asText()).isEqualTo("(cb: true, type: COUNT_BASED, tl: LOW)"); + } else { + assertThat(objectNode.get("partitionLevelCircuitBreakerCfg")).isNull(); + } + String expectedProactiveInitConfigString = reconstructProactiveInitConfigString(cosmosContainerIdentities, aggressiveWarmupDuration, proactiveConnectionRegionCount); assertThat(objectNode.get("proactiveInitCfg").asText()).isEqualTo(expectedProactiveInitConfigString); System.clearProperty("COSMOS.REPLICA_ADDRESS_VALIDATION_ENABLED"); + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @Test(groups = {"unit"}, dataProvider = "sessionRetryOptionsConfigProvider") diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 20e454ec4b50..a83e420106d2 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -5,7 +5,9 @@ import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificContext; +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import org.apache.commons.lang3.tuple.Pair; import org.mockito.Mockito; @@ -35,6 +37,25 @@ public class GlobalPartitionEndpointManagerForCircuitBreakerTests { @BeforeClass(groups = {"unit"}) public void beforeClass() { this.globalEndpointManagerMock = Mockito.mock(GlobalEndpointManager.class); + + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUsEndpointToLocationPair.getKey(), OperationType.Read)) + .thenReturn(LocationEastUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUsEndpointToLocationPair.getKey(), OperationType.Create)) + .thenReturn(LocationEastUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationCentralUsEndpointToLocationPair.getKey(), OperationType.Read)) + .thenReturn(LocationCentralUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationCentralUsEndpointToLocationPair.getKey(), OperationType.Create)) + .thenReturn(LocationCentralUsEndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUs2EndpointToLocationPair.getKey(), OperationType.Read)) + .thenReturn(LocationEastUs2EndpointToLocationPair.getRight()); + Mockito + .when(this.globalEndpointManagerMock.getRegionName(LocationEastUs2EndpointToLocationPair.getKey(), OperationType.Create)) + .thenReturn(LocationEastUs2EndpointToLocationPair.getRight()); } @DataProvider(name = "partitionLevelCircuitBreakerConfigs") @@ -105,7 +126,7 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS .handleLocationSuccessForPartitionKeyRange(request); Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -154,7 +175,7 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -200,7 +221,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); int exceptionCountToHandle - = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); for (int i = 1; i <= exceptionCountToHandle + 1; i++) { globalPartitionEndpointManagerForCircuitBreaker @@ -208,7 +229,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit } Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -256,7 +277,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); int exceptionCountToHandle - = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); for (int i = 1; i <= exceptionCountToHandle + 1; i++) { globalPartitionEndpointManagerForCircuitBreaker @@ -264,7 +285,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition } Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -323,7 +344,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); int exceptionCountToHandle - = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); for (int i = 1; i <= exceptionCountToHandle + 1; i++) { globalPartitionEndpointManagerForCircuitBreaker @@ -331,7 +352,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve } Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -348,7 +369,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve locationSpecificContext = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); - int successCountToUpgradeStatus = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyTentative, readOperationTrue); + int successCountToUpgradeStatus = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus.HealthyTentative, readOperationTrue); for (int i = 1; i <= successCountToUpgradeStatus + 1; i++) { globalPartitionEndpointManagerForCircuitBreaker @@ -397,7 +418,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); int exceptionCountToHandle - = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); for (int i = 1; i <= exceptionCountToHandle + 1; i++) { globalPartitionEndpointManagerForCircuitBreaker @@ -405,7 +426,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC } Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -420,7 +441,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC throw new RuntimeException(ex); } - exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyTentative, readOperationTrue); + exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyTentative, readOperationTrue); for (int i = 1; i <= exceptionCountToHandle + 1; i++) { globalPartitionEndpointManagerForCircuitBreaker @@ -472,7 +493,7 @@ public void allRegionsUnhealthyHandling(String partitionLevelCircuitBreakerConfi int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker .getConsecutiveExceptionBasedCircuitBreaker() - .getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, readOperationTrue); + .getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); for (int i = 1; i <= exceptionCountToHandle + 1; i++) { globalPartitionEndpointManagerForCircuitBreaker @@ -484,7 +505,7 @@ public void allRegionsUnhealthyHandling(String partitionLevelCircuitBreakerConfi } Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper( + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); assertThat(locationToLocationSpecificContextMappings).isNull(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 6261d4e5bebd..2d8b494cd532 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -22,6 +22,8 @@ import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; import com.azure.cosmos.implementation.circuitBreaker.ConsecutiveExceptionBasedCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.feedranges.FeedRangePartitionKeyImpl; @@ -1836,12 +1838,12 @@ private void execute( int expectedCircuitBreakingThreshold = doesOperationHaveWriteSemantics(faultInjectionRuleParamsWrapper.getFaultInjectionOperationType()) ? - consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, false) : - consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures, true); + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, false) : + consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, true); if (!hasReachedCircuitBreakingThreshold) { hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == globalPartitionEndpointManagerForCircuitBreaker.getExceptionCountByPartitionKeyRange( - new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId())); + new PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId())); validateResponseInPresenceOfFailures.accept(response); } else { executionCountAfterCircuitBreakingThresholdBreached++; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index 21004696e621..5245a436a848 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -164,6 +164,9 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost this.approximateInsertionCountInBloomFilter = request.requestContext.getApproximateBloomFilterInsertionCount(); storeResponseStatistics.sessionTokenEvaluationResults = request.requestContext.getSessionTokenEvaluationResults(); + storeResponseStatistics.regionToHealthStatusesForPartitionKeyRange = request.requestContext.getRegionToHealthStatusesForPartitionKeyRange(); + + System.out.println("In ClientSideRequestStats : " + request); if (request.requestContext.getEndToEndOperationLatencyPolicyConfig() != null) { storeResponseStatistics.e2ePolicyCfg = @@ -237,6 +240,7 @@ public void recordGatewayResponse( if (rxDocumentServiceRequest.requestContext != null) { gatewayStatistics.sessionTokenEvaluationResults = rxDocumentServiceRequest.requestContext.getSessionTokenEvaluationResults(); + gatewayStatistics.regionToHealthStatusesForPartitionKeyRange = rxDocumentServiceRequest.requestContext.getRegionToHealthStatusesForPartitionKeyRange(); } } gatewayStatistics.statusCode = storeResponseDiagnostics.getStatusCode(); @@ -665,6 +669,9 @@ public static class StoreResponseStatistics { @JsonSerialize private Set sessionTokenEvaluationResults; + @JsonSerialize + private Utils.ValueHolder> regionToHealthStatusesForPartitionKeyRange; + public String getExcludedRegions() { return this.excludedRegions; } public StoreResultDiagnostics getStoreResult() { @@ -853,6 +860,7 @@ public static class GatewayStatistics { private String faultInjectionRuleId; private List faultInjectionEvaluationResults; private Set sessionTokenEvaluationResults; + private Utils.ValueHolder> regionToHealthStatusesForPartitionKeyRange; public String getSessionToken() { return sessionToken; @@ -910,6 +918,10 @@ public Set getSessionTokenEvaluationResults() { return sessionTokenEvaluationResults; } + public Map getRegionToHealthStatusesForPartitionKeyRange() { + return regionToHealthStatusesForPartitionKeyRange.v; + } + public static class GatewayStatisticsSerializer extends StdSerializer { private static final long serialVersionUID = 1L; @@ -943,6 +955,7 @@ public void serialize(GatewayStatistics gatewayStatistics, } this.writeNonEmptyStringSetField(jsonGenerator, "sessionTokenEvaluationResults", gatewayStatistics.getSessionTokenEvaluationResults()); + this.writeNonNullObjectField(jsonGenerator, "regionHealthStatusesForPkRange", gatewayStatistics.getRegionToHealthStatusesForPartitionKeyRange()); jsonGenerator.writeEndObject(); } @@ -969,6 +982,14 @@ private void writeNonEmptyStringSetField(JsonGenerator jsonGenerator, String fie jsonGenerator.writePOJOField(fieldName, values); } + + private void writeNonNullObjectField(JsonGenerator jsonGenerator, String fieldName, Object object) throws IOException { + if (object == null) { + return; + } + + jsonGenerator.writePOJOField(fieldName, object); + } } } @@ -1006,10 +1027,6 @@ static class RegionWithContext implements Comparable { this.recordedTimestamp = System.currentTimeMillis(); } - public String getRegionContacted() { - return regionContacted; - } - @Override public int compareTo(RegionWithContext o) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java index 48eea593c53c..8f1ddfc122dd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DiagnosticsClientContext.java @@ -10,6 +10,7 @@ import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.SessionRetryOptions; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.circuitBreaker.PartitionLevelCircuitBreakerConfig; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.guava27.Strings; import com.fasterxml.jackson.core.JsonGenerator; @@ -89,6 +90,11 @@ public void serialize(DiagnosticsClientConfig clientConfig, JsonGenerator genera if (!StringUtils.isEmpty(clientConfig.regionScopedSessionContainerOptionsAsString)) { generator.writeStringField("regionScopedSessionCfg", clientConfig.regionScopedSessionContainerOptionsAsString); } + + if (!StringUtils.isEmpty(clientConfig.partitionLevelCircuitBreakerConfigAsString)) { + generator.writeStringField("partitionLevelCircuitBreakerCfg", clientConfig.partitionLevelCircuitBreakerConfigAsString); + } + } catch (Exception e) { logger.debug("unexpected failure", e); } @@ -121,6 +127,7 @@ class DiagnosticsClientConfig { private ConnectionPolicy connectionPolicy; private String sessionRetryOptionsAsString; private String regionScopedSessionContainerOptionsAsString; + private String partitionLevelCircuitBreakerConfigAsString; public DiagnosticsClientConfig withMachineId(String machineId) { this.machineId = machineId; @@ -228,6 +235,16 @@ public DiagnosticsClientConfig withSessionRetryOptions(SessionRetryOptions sessi return this; } + public DiagnosticsClientConfig withPartitionLevelCircuitBreakerConfig(PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig) { + if (partitionLevelCircuitBreakerConfig == null) { + this.partitionLevelCircuitBreakerConfigAsString = ""; + } else { + this.partitionLevelCircuitBreakerConfigAsString = partitionLevelCircuitBreakerConfig.getConfigAsString(); + } + + return this; + } + public DiagnosticsClientConfig withRegionScopedSessionContainerOptions(RegionScopedSessionContainer regionScopedSessionContainer) { if (regionScopedSessionContainer == null) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index bd406eba9c4d..ed8e7812b04e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -61,6 +61,7 @@ public class DocumentServiceRequestContext implements Cloneable { private FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker; private volatile Supplier clientRetryPolicySupplier; + private volatile Utils.ValueHolder> regionToHealthStatusesForPartitionKeyRange = new Utils.ValueHolder<>(); public DocumentServiceRequestContext() {} @@ -215,5 +216,13 @@ public Supplier getClientRetryPolicySupplier() { public void setClientRetryPolicySupplier(Supplier clientRetryPolicySupplier) { this.clientRetryPolicySupplier = clientRetryPolicySupplier; } + + public Utils.ValueHolder> getRegionToHealthStatusesForPartitionKeyRange() { + return regionToHealthStatusesForPartitionKeyRange; + } + + public void setRegionToHealthStatusesForPartitionKeyRange(Map regionToHealthStatusesForPartitionKeyRange) { + this.regionToHealthStatusesForPartitionKeyRange.v = regionToHealthStatusesForPartitionKeyRange; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java index 1e28b8191f19..d6e5d329a935 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java @@ -4,16 +4,17 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import java.util.Map; public class FeedOperationContextForCircuitBreaker { - private final Map partitionKeyRangesWithSuccess; + private final Map partitionKeyRangesWithSuccess; private final boolean isThresholdBasedAvailabilityStrategyEnabled; private boolean isRequestHedged; - public FeedOperationContextForCircuitBreaker(Map partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { + public FeedOperationContextForCircuitBreaker(Map partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { this.partitionKeyRangesWithSuccess = partitionKeyRangesWithSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; } @@ -27,14 +28,12 @@ public boolean getIsRequestHedged() { } public void addPartitionKeyRangeWithSuccess(PartitionKeyRange partitionKeyRange, String resourceId) { - GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper partitionKeyRangeWrapper - = new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); this.partitionKeyRangesWithSuccess.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); } public boolean hasPartitionKeyRangeSeenSuccess(PartitionKeyRange partitionKeyRange, String resourceId) { - GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper partitionKeyRangeWrapper - = new GlobalPartitionEndpointManagerForCircuitBreaker.PartitionKeyRangeWrapper(partitionKeyRange, resourceId); + PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); return this.partitionKeyRangesWithSuccess.containsKey(partitionKeyRangeWrapper); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 4de9605e278d..605e0c0d6241 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -28,6 +28,7 @@ import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; +import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; import com.azure.cosmos.implementation.cpu.CpuMemoryListener; import com.azure.cosmos.implementation.cpu.CpuMemoryMonitor; @@ -546,6 +547,8 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.globalPartitionEndpointManagerForCircuitBreaker.init(); + this.diagnosticsClientConfig.withPartitionLevelCircuitBreakerConfig(this.globalPartitionEndpointManagerForCircuitBreaker.getCircuitBreakerConfig()); + this.retryPolicy = new RetryPolicy( this, this.globalEndpointManager, @@ -672,7 +675,6 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.globalPartitionEndpointManagerForCircuitBreaker); this.globalEndpointManager.init(); - DatabaseAccount databaseAccountSnapshot = this.initializeGatewayConfigurationReader(); this.resetSessionContainerIfNeeded(databaseAccountSnapshot); @@ -2040,6 +2042,10 @@ private boolean requiresFeedRangeFiltering(RxDocumentServiceRequest request) { return false; } + if (request.hasFeedRangeFilteringBeenApplied()) { + return false; + } + switch (request.getOperationType()) { case ReadFeed: case Query: @@ -2378,12 +2384,6 @@ private Mono handleCircuitBreakingFeedbackForPointOperation( } } } - - // todo: investigate below scenario - gets called when INTERNAL_SERVER_ERROR injected - // todo: something is causing cancellation w/o e2e operation timeout set -// else { -// this.handleLocationExceptionForPartitionKeyRange(potentiallyFailedRequest); -// } }); } @@ -2430,49 +2430,6 @@ private Mono> handleCircuitBreakingFeedba }); } - private Mono handleCircuitBreakingFeedbackForPointOperationWithAvailabilityStrategy(Mono response, RxDocumentServiceRequest request) { - - if (!this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { - return response; - } - - return response - .doOnSuccess(nonTransientPointOperationResult -> { - - if (!nonTransientPointOperationResult.isError()) { - - checkNotNull(request, "Argument 'request' cannot be null!"); - checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); - - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker - = request.requestContext.getPointOperationContextForCircuitBreaker(); - - pointOperationContextForCircuitBreaker.setHasOperationSeenSuccess(); - this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request); - } - - }) - .doFinally(signalType -> { - if (signalType != SignalType.CANCEL) { - return; - } - - checkNotNull(request, "Argument 'request' cannot be null!"); - checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); - - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker - = request.requestContext.getPointOperationContextForCircuitBreaker(); - - checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' cannot be null!"); - - if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() - && pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled() - && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { - this.handleLocationCancellationExceptionForPartitionKeyRange(request); - } - }); - } - private static Throwable getCancellationExceptionForPointOperations( ScopedDiagnosticsFactory scopedDiagnosticsFactory, Throwable throwable, @@ -6293,7 +6250,7 @@ private Mono executeFeedOperationWithAvailabilityStrategy( false, initialExcludedRegions); - Map partitionKeyRangesWithSuccess = new ConcurrentHashMap<>(); + Map partitionKeyRangesWithSuccess = new ConcurrentHashMap<>(); FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, false); feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow.setIsRequestHedged(false); @@ -6310,11 +6267,8 @@ private Mono executeFeedOperationWithAvailabilityStrategy( orderedApplicableRegionsForSpeculation .forEach(region -> { - RxDocumentServiceRequest clonedRequest = req.clone(); - logger.info("Cloned request : {}", req); - if (monoList.isEmpty()) { // no special error handling for transient errors to suppress them here // because any cross-regional retries are expected to be processed @@ -6325,8 +6279,6 @@ private Mono executeFeedOperationWithAvailabilityStrategy( feedOperationContextForCircuitBreakerForNonHedgedRequest.setIsRequestHedged(false); clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForNonHedgedRequest); - logger.info("Cloned request : {}", clonedRequest); - Mono> initialMonoAcrossAllRegions = handleCircuitBreakingFeedbackForFeedOperationWithAvailabilityStrategy(feedOperation.apply(retryPolicyFactory, clonedRequest) .map(NonTransientFeedOperationResult::new) @@ -6356,8 +6308,6 @@ private Mono executeFeedOperationWithAvailabilityStrategy( feedOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForHedgedRequest); - logger.info("Cloned request : {}", clonedRequest); - // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors @@ -6616,7 +6566,7 @@ public void reset() { } } - static class CollectionRoutingMapNotFoundException extends CosmosException { + private static class CollectionRoutingMapNotFoundException extends CosmosException { private static final long serialVersionUID = 1L; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java index 20d29ea1b48e..12deb989dc25 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentServiceRequest.java @@ -87,6 +87,8 @@ public class RxDocumentServiceRequest implements Cloneable { private volatile boolean nonIdempotentWriteRetriesEnabled = false; + private volatile boolean hasFeedRangeFilteringBeenApplied = false; + public boolean isReadOnlyRequest() { return this.operationType.isReadOnlyOperation(); } @@ -120,6 +122,14 @@ public boolean getNonIdempotentWriteRetriesEnabled() { return this.nonIdempotentWriteRetriesEnabled; } + public boolean hasFeedRangeFilteringBeenApplied() { + return this.hasFeedRangeFilteringBeenApplied; + } + + public void setHasFeedRangeFilteringBeenApplied(boolean hasFeedRangeFilteringBeenApplied) { + this.hasFeedRangeFilteringBeenApplied = hasFeedRangeFilteringBeenApplied; + } + public boolean isReadOnly() { return this.isReadOnlyRequest() || this.isReadOnlyScript(); } @@ -1069,6 +1079,7 @@ public RxDocumentServiceRequest clone() { rxDocumentServiceRequest.effectiveRange = this.effectiveRange; rxDocumentServiceRequest.isFeed = this.isFeed; rxDocumentServiceRequest.resourceId = this.resourceId; + rxDocumentServiceRequest.hasFeedRangeFilteringBeenApplied = this.hasFeedRangeFilteringBeenApplied; return rxDocumentServiceRequest; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 76b1fdb0688b..2fbe2821f721 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -20,8 +20,7 @@ public LocationSpecificContext handleException(LocationSpecificContext locationS int exceptionCountAfterHandling = (isReadOnlyRequest) ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); - GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus - = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); switch (locationHealthStatus) { case Healthy: @@ -64,8 +63,7 @@ public LocationSpecificContext handleSuccess(LocationSpecificContext locationSpe int successCountAfterHandling = (isReadOnlyRequest) ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForWrite(); - GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus - = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); switch (locationHealthStatus) { case Healthy: @@ -139,13 +137,13 @@ public boolean canHealthStatusBeUpgraded(LocationSpecificContext locationSpecifi int exceptionCountActual = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); - GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); return successCountActual >= getMinimumSuccessCountForStatusUpgrade(locationHealthStatus, isReadOnlyRequest) && (double) exceptionCountActual / (double) successCountActual < getAllowedExceptionToSuccessRatio(locationHealthStatus, isReadOnlyRequest); } - private static double getAllowedExceptionToSuccessRatio(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus status, boolean isReadOnlyRequest) { + private static double getAllowedExceptionToSuccessRatio(LocationHealthStatus status, boolean isReadOnlyRequest) { if (isReadOnlyRequest) { switch (status) { @@ -168,7 +166,7 @@ private static double getAllowedExceptionToSuccessRatio(GlobalPartitionEndpointM } } - public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus status, boolean isReadOnlyRequest) { + public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, boolean isReadOnlyRequest) { if (isReadOnlyRequest) { switch (status) { @@ -193,7 +191,6 @@ public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManag throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case Healthy: - return 0; case Unavailable: return 0; default: @@ -229,7 +226,7 @@ public int getAllowedExceptionCountToMaintainStatus(GlobalPartitionEndpointManag } } - public int getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus status, boolean isReadOnlyRequest) { + public int getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus status, boolean isReadOnlyRequest) { if (isReadOnlyRequest) { switch (status) { case HealthyTentative: @@ -243,9 +240,7 @@ public int getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManager throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case Unavailable: - return 0; case HealthyWithFailures: - return 0; case Healthy: return 0; default: @@ -264,9 +259,7 @@ public int getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManager throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); } case Unavailable: - return 0; case HealthyWithFailures: - return 0; case Healthy: return 0; default: @@ -278,4 +271,8 @@ public int getMinimumSuccessCountForStatusUpgrade(GlobalPartitionEndpointManager public boolean isPartitionLevelCircuitBreakerEnabled() { return this.partitionLevelCircuitBreakerConfig.isPartitionLevelCircuitBreakerEnabled(); } + + public PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreakerConfig() { + return partitionLevelCircuitBreakerConfig; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 57184cc3d0ae..84a608133831 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -8,6 +8,7 @@ import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import org.slf4j.Logger; @@ -36,19 +37,16 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo; private final ConcurrentHashMap partitionsWithPossibleUnavailableRegions; private final LocationContextTransitionHandler locationContextTransitionHandler; - private ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; + private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); this.partitionsWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; - this.locationContextTransitionHandler = new LocationContextTransitionHandler(); PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); - - if (partitionLevelCircuitBreakerConfig.getCircuitBreakerType().equals("COUNT_BASED")) { - this.consecutiveExceptionBasedCircuitBreaker = new ConsecutiveExceptionBasedCircuitBreaker(partitionLevelCircuitBreakerConfig); - } + this.consecutiveExceptionBasedCircuitBreaker = new ConsecutiveExceptionBasedCircuitBreaker(partitionLevelCircuitBreakerConfig); + this.locationContextTransitionHandler = new LocationContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); } public void init() { @@ -62,9 +60,7 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; - if (partitionKeyRange == null) { - return; - } + checkNotNull(request.requestContext.resolvedPartitionKeyRange, "Argument 'request.requestContext.resolvedPartitionKeyRange' cannot be null!"); String collectionResourceId = request.getResourceId(); checkNotNull(collectionResourceId, "Argument 'collectionResourceId' cannot be null!"); @@ -92,6 +88,7 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest partitionLevelLocationUnavailabilityInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapperAsKey, applicableEndpoints, request.isReadOnlyRequest())); } + request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionLevelLocationUnavailabilityInfoAsVal.regionToHealthStatus); return partitionLevelLocationUnavailabilityInfoAsVal; }); @@ -123,10 +120,7 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r PartitionKeyRange partitionKeyRange = request.requestContext.resolvedPartitionKeyRange; - // todo: how to handle this? - if (partitionKeyRange == null) { - return; - } + checkNotNull(request.requestContext.resolvedPartitionKeyRange, "Argument 'request.requestContext.resolvedPartitionKeyRange' cannot be null!"); String resourceId = request.getResourceId(); @@ -144,6 +138,7 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r succeededLocation, request.isReadOnlyRequest()); + request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionKeyRangeToFailoverInfoAsVal.regionToHealthStatus); return partitionKeyRangeToFailoverInfoAsVal; }); } @@ -225,6 +220,10 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques return false; } + if (request.getResourceType() != ResourceType.Document) { + return false; + } + GlobalEndpointManager globalEndpointManager = this.globalEndpointManager; if (!globalEndpointManager.canUseMultipleWriteLocations(request)) { @@ -239,9 +238,13 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques private class PartitionLevelLocationUnavailabilityInfo { private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; + private final ConcurrentHashMap regionToHealthStatus; + private final LocationContextTransitionHandler locationContextTransitionHandler; - PartitionLevelLocationUnavailabilityInfo() { + private PartitionLevelLocationUnavailabilityInfo() { this.locationEndpointToLocationSpecificContextForPartition = new ConcurrentHashMap<>(); + this.regionToHealthStatus = new ConcurrentHashMap<>(); + this.locationContextTransitionHandler = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationContextTransitionHandler; } public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException, boolean isReadOnlyRequest) { @@ -261,13 +264,19 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper false); } - LocationSpecificContext locationSpecificContextAfterTransition = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationContextTransitionHandler.handleException( + LocationSpecificContext locationSpecificContextAfterTransition = this.locationContextTransitionHandler.handleException( locationSpecificContextAsVal, partitionKeyRangeWrapper, + GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionsWithPossibleUnavailableRegions, locationWithException, isReadOnlyRequest); + this.regionToHealthStatus.put( + GlobalPartitionEndpointManagerForCircuitBreaker + .this.globalEndpointManager + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), + locationSpecificContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); + isExceptionThresholdBreached.set(locationSpecificContextAfterTransition.isExceptionThresholdBreached()); return locationSpecificContextAfterTransition; }); @@ -291,14 +300,19 @@ public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI false); } - locationSpecificContextAfterTransition = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationContextTransitionHandler.handleSuccess( + locationSpecificContextAfterTransition = this.locationContextTransitionHandler.handleSuccess( locationSpecificContextAsVal, partitionKeyRangeWrapper, succeededLocation, false, isReadOnlyRequest); + this.regionToHealthStatus.put( + GlobalPartitionEndpointManagerForCircuitBreaker + .this.globalEndpointManager + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), + locationSpecificContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); + return locationSpecificContextAfterTransition; }); } @@ -342,8 +356,7 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe this.locationEndpointToLocationSpecificContextForPartition.compute(mostHealthyTentativeLocation, (mostHealthyTentativeLocationAsKey, locationSpecificStatusAsVal) -> { if (locationSpecificStatusAsVal != null) { - locationSpecificStatusAsVal = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationContextTransitionHandler.handleSuccess( + locationSpecificStatusAsVal = this.locationContextTransitionHandler.handleSuccess( locationSpecificStatusAsVal, partitionKeyRangeWrapper, mostHealthyTentativeLocationAsKey, @@ -357,242 +370,12 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe return false; } - } - - private class LocationContextTransitionHandler { - - public LocationSpecificContext handleSuccess( - LocationSpecificContext locationSpecificContext, - PartitionKeyRangeWrapper partitionKeyRangeWrapper, - URI locationWithSuccess, - boolean forceStatusChange, - boolean isReadOnlyRequest) { - - LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); - - int exceptionCountActual - = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); - - switch (currentLocationHealthStatusSnapshot) { - case Healthy: - break; - case HealthyWithFailures: - if (!forceStatusChange) { - if (exceptionCountActual > 0) { - return GlobalPartitionEndpointManagerForCircuitBreaker - .this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificContext, isReadOnlyRequest); - } - } - break; - - case HealthyTentative: - if (!forceStatusChange) { - LocationSpecificContext locationSpecificContextInner - = GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificContext, isReadOnlyRequest); - - if (GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificContextInner, isReadOnlyRequest)) { - - if (logger.isDebugEnabled()) { - logger.debug("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - } - - return this.transitionHealthStatus(LocationHealthStatus.Healthy); - } else { - return locationSpecificContextInner; - } - } - break; - case Unavailable: - Instant unavailableSinceActual = locationSpecificContext.getUnavailableSince(); - if (!forceStatusChange) { - if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { - - if (logger.isDebugEnabled()) { - logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - } - - return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); - } - } else { - - if (logger.isDebugEnabled()) { - logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - } - - return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); - } - break; - default: - throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); - } - - return locationSpecificContext; - } - - public LocationSpecificContext handleException( - LocationSpecificContext locationSpecificContext, - PartitionKeyRangeWrapper partitionKeyRangeWrapper, - URI locationWithException, - boolean isReadOnlyRequest) { - - LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); - - switch (currentLocationHealthStatusSnapshot) { - case Healthy: - - if (logger.isDebugEnabled()) { - logger.debug("Partition {}-{} of collection : {} marked as HealthyWithFailures from Healthy for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - } - - return this.transitionHealthStatus(LocationHealthStatus.HealthyWithFailures); - case HealthyWithFailures: - if (!GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { - - LocationSpecificContext locationSpecificContextInner = GlobalPartitionEndpointManagerForCircuitBreaker - .this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); - - if (logger.isDebugEnabled()) { - logger.debug("Partition {}-{} of collection : {} has exception count of {} for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - isReadOnlyRequest ? locationSpecificContextInner.getExceptionCountForRead() : locationSpecificContextInner.getExceptionCountForWrite(), - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - } - - return locationSpecificContextInner; - } else { - GlobalPartitionEndpointManagerForCircuitBreaker - .this.partitionsWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); - - if (logger.isDebugEnabled()) { - logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyWithFailures for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - } - - return this.transitionHealthStatus(LocationHealthStatus.Unavailable); - } - case HealthyTentative: - if (!GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { - - return GlobalPartitionEndpointManagerForCircuitBreaker.this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); - } else { - - if (logger.isDebugEnabled()) { - logger.debug("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", - partitionKeyRangeWrapper.partitionKeyRange.getMinInclusive(), - partitionKeyRangeWrapper.partitionKeyRange.getMaxExclusive(), - partitionKeyRangeWrapper.resourceId, - GlobalPartitionEndpointManagerForCircuitBreaker.this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); - } - - return this.transitionHealthStatus(LocationHealthStatus.Unavailable); - } - default: - throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); - } - } - - public LocationSpecificContext transitionHealthStatus(LocationHealthStatus newStatus) { - - switch (newStatus) { - case Healthy: - return new LocationSpecificContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.Healthy, - false); - case HealthyWithFailures: - return new LocationSpecificContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.HealthyWithFailures, - false); - case Unavailable: - return new LocationSpecificContext( - 0, - 0, - 0, - 0, - Instant.now(), - LocationHealthStatus.Unavailable, - true); - case HealthyTentative: - return new LocationSpecificContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.HealthyTentative, - false); - default: - throw new IllegalStateException("Unsupported health status: " + newStatus); - } + public ConcurrentHashMap getRegionToHealthStatus() { + return regionToHealthStatus; } } - public static class PartitionKeyRangeWrapper { - final PartitionKeyRange partitionKeyRange; - final String resourceId; - - public PartitionKeyRangeWrapper(PartitionKeyRange partitionKeyRange, String resourceId) { - this.partitionKeyRange = partitionKeyRange; - this.resourceId = resourceId; - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - PartitionKeyRangeWrapper that = (PartitionKeyRangeWrapper) o; - return Objects.equals(partitionKeyRange, that.partitionKeyRange) && Objects.equals(resourceId, that.resourceId); - } - - @Override - public int hashCode() { - return Objects.hash(partitionKeyRange, resourceId); - } - } - - // todo (abhmohanty): does this need to be public - public enum LocationHealthStatus { - Healthy, HealthyWithFailures, Unavailable, HealthyTentative - } - // todo: keep private and access through reflection public int getExceptionCountByPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { @@ -635,4 +418,8 @@ public Map getLocationToLocationSpecificContextMap public ConsecutiveExceptionBasedCircuitBreaker getConsecutiveExceptionBasedCircuitBreaker() { return this.consecutiveExceptionBasedCircuitBreaker; } + + public PartitionLevelCircuitBreakerConfig getCircuitBreakerConfig() { + return this.consecutiveExceptionBasedCircuitBreaker.getPartitionLevelCircuitBreakerConfig(); + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationContextTransitionHandler.java new file mode 100644 index 000000000000..386a16420b55 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationContextTransitionHandler.java @@ -0,0 +1,233 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.OperationType; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URI; +import java.time.Duration; +import java.time.Instant; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class LocationContextTransitionHandler { + + private static final Logger logger = LoggerFactory.getLogger(LocationContextTransitionHandler.class); + + private final GlobalEndpointManager globalEndpointManager; + private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; + + public LocationContextTransitionHandler( + GlobalEndpointManager globalEndpointManager, + ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker) { + + this.globalEndpointManager = globalEndpointManager; + this.consecutiveExceptionBasedCircuitBreaker = consecutiveExceptionBasedCircuitBreaker; + } + + public LocationSpecificContext handleSuccess( + LocationSpecificContext locationSpecificContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + URI locationWithSuccess, + boolean forceStatusChange, + boolean isReadOnlyRequest) { + + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); + + int exceptionCountActual + = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + + switch (currentLocationHealthStatusSnapshot) { + case Healthy: + break; + case HealthyWithFailures: + if (!forceStatusChange) { + if (exceptionCountActual > 0) { + return this.consecutiveExceptionBasedCircuitBreaker + .handleSuccess(locationSpecificContext, isReadOnlyRequest); + } + } + break; + + case HealthyTentative: + if (!forceStatusChange) { + + LocationSpecificContext locationSpecificContextInner + = this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificContext, isReadOnlyRequest); + + if (this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificContextInner, isReadOnlyRequest)) { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId(), + this.globalEndpointManager + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return this.transitionHealthStatus(LocationHealthStatus.Healthy); + } else { + return locationSpecificContextInner; + } + } + break; + case Unavailable: + Instant unavailableSinceActual = locationSpecificContext.getUnavailableSince(); + if (!forceStatusChange) { + if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId(), + this.globalEndpointManager + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); + } + } else { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId(), + this.globalEndpointManager + .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); + } + break; + default: + throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); + } + + return locationSpecificContext; + } + + public LocationSpecificContext handleException( + LocationSpecificContext locationSpecificContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions, + URI locationWithException, + boolean isReadOnlyRequest) { + + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); + + switch (currentLocationHealthStatusSnapshot) { + case Healthy: + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as HealthyWithFailures from Healthy for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId(), + this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return this.transitionHealthStatus(LocationHealthStatus.HealthyWithFailures); + case HealthyWithFailures: + if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { + + LocationSpecificContext locationSpecificContextInner + = this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} has exception count of {} for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId(), + isReadOnlyRequest ? locationSpecificContextInner.getExceptionCountForRead() : locationSpecificContextInner.getExceptionCountForWrite(), + this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return locationSpecificContextInner; + } else { + partitionKeyRangesWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); + + if (logger.isDebugEnabled()) { + logger.info("Partition {}-{} of collection : {} marked as Unavailable from HealthyWithFailures for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange(), + this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return this.transitionHealthStatus(LocationHealthStatus.Unavailable); + } + case HealthyTentative: + if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { + return this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); + } else { + + if (logger.isDebugEnabled()) { + logger.debug("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId(), + this.globalEndpointManager + .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + } + + return this.transitionHealthStatus(LocationHealthStatus.Unavailable); + } + default: + throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); + } + } + + public LocationSpecificContext transitionHealthStatus(LocationHealthStatus newStatus) { + + switch (newStatus) { + case Healthy: + return new LocationSpecificContext( + 0, + 0, + 0, + 0, + Instant.MAX, + LocationHealthStatus.Healthy, + false); + case HealthyWithFailures: + return new LocationSpecificContext( + 0, + 0, + 0, + 0, + Instant.MAX, + LocationHealthStatus.HealthyWithFailures, + false); + case Unavailable: + return new LocationSpecificContext( + 0, + 0, + 0, + 0, + Instant.now(), + LocationHealthStatus.Unavailable, + true); + case HealthyTentative: + return new LocationSpecificContext( + 0, + 0, + 0, + 0, + Instant.MAX, + LocationHealthStatus.HealthyTentative, + false); + default: + throw new IllegalStateException("Unsupported health status: " + newStatus); + } + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationHealthStatus.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationHealthStatus.java new file mode 100644 index 000000000000..c0034708dd5a --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationHealthStatus.java @@ -0,0 +1,21 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +public enum LocationHealthStatus { + Healthy("Healthy"), + HealthyWithFailures("HealthyWithFailures"), + Unavailable("Unavailable"), + HealthyTentative("HealthyTentative"); + + private final String stringifiedRepresentation; + + LocationHealthStatus(String stringifiedRepresentation) { + this.stringifiedRepresentation = stringifiedRepresentation; + } + + public String getStringifiedLocationHealthStatus() { + return stringifiedRepresentation; + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java index d27bf0561a0c..88999c04fde9 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java @@ -11,7 +11,7 @@ public class LocationSpecificContext { private final int exceptionCountForRead; private final int successCountForRead; private final Instant unavailableSince; - private final GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus; + private final LocationHealthStatus locationHealthStatus; private final boolean isExceptionThresholdBreached; public LocationSpecificContext( @@ -20,7 +20,7 @@ public LocationSpecificContext( int successCountForRead, int exceptionCountForRead, Instant unavailableSince, - GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus locationHealthStatus, + LocationHealthStatus locationHealthStatus, boolean isExceptionThresholdBreached) { this.successCountForWrite = successCountForWrite; @@ -37,9 +37,9 @@ public boolean isExceptionThresholdBreached() { } public boolean isRegionAvailableToProcessRequests() { - return this.locationHealthStatus == GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.Healthy || - this.locationHealthStatus == GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyWithFailures || - this.locationHealthStatus == GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus.HealthyTentative; + return this.locationHealthStatus == LocationHealthStatus.Healthy || + this.locationHealthStatus == LocationHealthStatus.HealthyWithFailures || + this.locationHealthStatus == LocationHealthStatus.HealthyTentative; } public int getExceptionCountForWrite() { @@ -62,7 +62,7 @@ public Instant getUnavailableSince() { return unavailableSince; } - public GlobalPartitionEndpointManagerForCircuitBreaker.LocationHealthStatus getLocationHealthStatus() { + public LocationHealthStatus getLocationHealthStatus() { return locationHealthStatus; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java new file mode 100644 index 000000000000..9a6b770efa54 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java @@ -0,0 +1,39 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation.circuitBreaker; + +import com.azure.cosmos.implementation.PartitionKeyRange; + +import java.util.Objects; + +public class PartitionKeyRangeWrapper { + private final PartitionKeyRange partitionKeyRange; + private final String resourceId; + + public PartitionKeyRangeWrapper(PartitionKeyRange partitionKeyRange, String resourceId) { + this.partitionKeyRange = partitionKeyRange; + this.resourceId = resourceId; + } + + public PartitionKeyRange getPartitionKeyRange() { + return partitionKeyRange; + } + + public String getResourceId() { + return resourceId; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + PartitionKeyRangeWrapper that = (PartitionKeyRangeWrapper) o; + return Objects.equals(partitionKeyRange, that.partitionKeyRange) && Objects.equals(resourceId, that.resourceId); + } + + @Override + public int hashCode() { + return Objects.hash(partitionKeyRange, resourceId); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java index 643b030c0c9c..0d37e4ab7b94 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java @@ -4,6 +4,7 @@ package com.azure.cosmos.implementation.circuitBreaker; import com.azure.cosmos.implementation.Utils; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonSetter; import com.fasterxml.jackson.annotation.Nulls; @@ -25,6 +26,8 @@ public class PartitionLevelCircuitBreakerConfig { @JsonProperty private String circuitBreakerFailureTolerance = "LOW"; + private String cachedConfigAsString = ""; + public Boolean isPartitionLevelCircuitBreakerEnabled() { return isPartitionLevelCircuitBreakerEnabled; } @@ -45,6 +48,15 @@ public String toJson() { } } + public String getConfigAsString() { + if (StringUtils.isEmpty(this.cachedConfigAsString)) { + this.cachedConfigAsString = "(" + "cb: " + this.isPartitionLevelCircuitBreakerEnabled + ", " + + "type: " + this.circuitBreakerType + ", " + + "tl: " + this.circuitBreakerFailureTolerance + ")"; + } + return this.cachedConfigAsString; + } + public static PartitionLevelCircuitBreakerConfig fromJsonString(String jsonString) { try { return Utils.getSimpleObjectMapper().readValue(jsonString, PartitionLevelCircuitBreakerConfig.class); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java index 05d0611e1ac6..57e62d3b9abd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangeEpkImpl.java @@ -219,6 +219,7 @@ public Mono populateFeedRangeFilteringHeaders( // 2) The EpkRange spans exactly one physical partition // In this case we can route to the physical pkrange id request.routeTo(new PartitionKeyRangeIdentity(pkRanges.get(0).getId())); + request.setHasFeedRangeFilteringBeenApplied(true); } else { // 3) The EpkRange spans less than single physical partition // In this case we route to the physical partition and @@ -236,6 +237,7 @@ public Mono populateFeedRangeFilteringHeaders( HttpConstants.HttpHeaders.END_EPK, this.range.getMax()); + request.setHasFeedRangeFilteringBeenApplied(true); } return Mono.just(request); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java index e2eba8fd77fa..38453bdca944 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyImpl.java @@ -167,7 +167,7 @@ public Mono populateFeedRangeFilteringHeaders( .getNormalizedEffectiveRange(routingMapProvider, metadataDiagnosticsCtx, collectionResolutionMono) .map(effectiveRange -> { request.setEffectiveRange(effectiveRange); - + request.setHasFeedRangeFilteringBeenApplied(true); return request; }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java index 281adbb9e6d4..df92f69d23bb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/feedranges/FeedRangePartitionKeyRangeImpl.java @@ -149,7 +149,7 @@ public Mono populateFeedRangeFilteringHeaders( .getNormalizedEffectiveRange(routingMapProvider, metadataDiagnosticsCtx, collectionResolutionMono) .map(effectiveRange -> { request.setEffectiveRange(effectiveRange); - + request.setHasFeedRangeFilteringBeenApplied(true); return request; }); } From e7e2ab7b8409b47aa516794a26ac20a93f196101 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 18 Jun 2024 18:45:35 -0400 Subject: [PATCH 076/140] Refactoring. --- .../implementation/ClientSideRequestStatistics.java | 2 -- .../cosmos/implementation/RxDocumentClientImpl.java | 13 ++++++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index 5245a436a848..f6b40fa40b63 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -166,8 +166,6 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost storeResponseStatistics.sessionTokenEvaluationResults = request.requestContext.getSessionTokenEvaluationResults(); storeResponseStatistics.regionToHealthStatusesForPartitionKeyRange = request.requestContext.getRegionToHealthStatusesForPartitionKeyRange(); - System.out.println("In ClientSideRequestStats : " + request); - if (request.requestContext.getEndToEndOperationLatencyPolicyConfig() != null) { storeResponseStatistics.e2ePolicyCfg = request.requestContext.getEndToEndOperationLatencyPolicyConfig().toString(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 605e0c0d6241..3711af592741 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -3860,11 +3860,11 @@ public Mono populateFeedRangeHeader(RxDocumentServiceR @Override public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions) { - if (RxDocumentClientImpl.this.requiresFeedRangeFiltering(request)) { + if (RxDocumentClientImpl.this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { String collectionRid = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getCollectionRid(queryRequestOptions); - checkNotNull(collectionRid, "collectionRid cannot be null!"); + checkNotNull(collectionRid, "Argument 'collectionRid' cannot be null!"); return RxDocumentClientImpl.this.partitionKeyRangeCache.tryLookupAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), collectionRid, null, null) .flatMap(collectionRoutingMapValueHolder -> { @@ -5769,11 +5769,14 @@ public void addPartitionLevelUnavailableRegionsForRequest( RequestOptions options, CollectionRoutingMap collectionRoutingMap) { - checkNotNull(options, "options cannot be null!"); - checkNotNull(options.getPartitionKeyDefinition(), "partitionKeyDefinition within options cannot be null!"); - checkNotNull(collectionRoutingMap, "collectionRoutingMap cannot be null!"); + checkNotNull(request, "Argument 'request' cannot be null!"); if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { + + checkNotNull(options, "Argument 'options' cannot be null!"); + checkNotNull(options.getPartitionKeyDefinition(), "Argument 'partitionKeyDefinition' within options cannot be null!"); + checkNotNull(collectionRoutingMap, "Argument 'collectionRoutingMap' cannot be null!"); + PartitionKeyDefinition partitionKeyDefinition = options.getPartitionKeyDefinition(); PartitionKeyInternal partitionKeyInternal = request.getPartitionKeyInternal(); From d94ecb6419665d4b2c7ce08696e87509b2ac959c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 18 Jun 2024 19:02:28 -0400 Subject: [PATCH 077/140] Refactoring. --- ...itionEndpointManagerForCircuitBreaker.java | 42 +------------------ 1 file changed, 2 insertions(+), 40 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 84a608133831..99cd15590d5d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -23,7 +23,6 @@ import java.util.Collections; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; @@ -85,7 +84,7 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest this.globalEndpointManager.getApplicableWriteEndpoints(request.requestContext.getExcludeRegions()); isFailoverPossible.set( - partitionLevelLocationUnavailabilityInfoAsVal.areLocationsAvailableForPartitionKeyRange(partitionKeyRangeWrapperAsKey, applicableEndpoints, request.isReadOnlyRequest())); + partitionLevelLocationUnavailabilityInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); } request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionLevelLocationUnavailabilityInfoAsVal.regionToHealthStatus); @@ -317,7 +316,7 @@ public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI }); } - public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper, List availableLocationsAtAccountLevel, boolean isReadOnlyRequest) { + public boolean areLocationsAvailableForPartitionKeyRange(List availableLocationsAtAccountLevel) { for (URI availableLocation : availableLocationsAtAccountLevel) { if (!this.locationEndpointToLocationSpecificContextForPartition.containsKey(availableLocation)) { @@ -331,43 +330,6 @@ public boolean areLocationsAvailableForPartitionKeyRange(PartitionKeyRangeWrappe } } - Instant mostHealthyTentativeTimeAcrossRegions = Instant.MAX; - LocationSpecificContext locationLevelFailureMetadataForMostStaleLocation = null; - URI mostHealthyTentativeLocation = null; - - // find region with most 'stale' unavailability - for (Map.Entry uriToLocationLevelFailureMetadata : this.locationEndpointToLocationSpecificContextForPartition.entrySet()) { - LocationSpecificContext locationSpecificContext = uriToLocationLevelFailureMetadata.getValue(); - - if (locationSpecificContext.isRegionAvailableToProcessRequests()) { - return true; - } - - Instant unavailableSinceSnapshot = locationSpecificContext.getUnavailableSince(); - - if (mostHealthyTentativeTimeAcrossRegions.isAfter(unavailableSinceSnapshot)) { - mostHealthyTentativeTimeAcrossRegions = unavailableSinceSnapshot; - mostHealthyTentativeLocation = uriToLocationLevelFailureMetadata.getKey(); - locationLevelFailureMetadataForMostStaleLocation = locationSpecificContext; - } - } - - if (locationLevelFailureMetadataForMostStaleLocation != null) { - this.locationEndpointToLocationSpecificContextForPartition.compute(mostHealthyTentativeLocation, (mostHealthyTentativeLocationAsKey, locationSpecificStatusAsVal) -> { - - if (locationSpecificStatusAsVal != null) { - locationSpecificStatusAsVal = this.locationContextTransitionHandler.handleSuccess( - locationSpecificStatusAsVal, - partitionKeyRangeWrapper, - mostHealthyTentativeLocationAsKey, - true, - isReadOnlyRequest); - } - - return locationSpecificStatusAsVal; - }); - } - return false; } From a7e8edf28e432d5a80feb7cd02177d84a2bd0270 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 18 Jun 2024 19:17:02 -0400 Subject: [PATCH 078/140] Refactoring. --- .../GlobalPartitionEndpointManagerForCircuitBreaker.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 99cd15590d5d..f3869c360d1e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -87,7 +87,7 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest partitionLevelLocationUnavailabilityInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); } - request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionLevelLocationUnavailabilityInfoAsVal.regionToHealthStatus); + request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionLevelLocationUnavailabilityInfoAsVal.getRegionToHealthStatus()); return partitionLevelLocationUnavailabilityInfoAsVal; }); @@ -137,7 +137,7 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r succeededLocation, request.isReadOnlyRequest()); - request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionKeyRangeToFailoverInfoAsVal.regionToHealthStatus); + request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionKeyRangeToFailoverInfoAsVal.getRegionToHealthStatus()); return partitionKeyRangeToFailoverInfoAsVal; }); } From cfd8bb65c36bb0104dbe8abfa2f84bf3c1c66b27 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 18 Jun 2024 22:00:44 -0400 Subject: [PATCH 079/140] Fixing tests. --- ...EndpointManagerForCircuitBreakerTests.java | 75 ++++++ .../PartitionLevelCircuitBreakerTests.java | 233 +----------------- .../implementation/RxDocumentClientImpl.java | 11 +- ...itionEndpointManagerForCircuitBreaker.java | 4 + 4 files changed, 88 insertions(+), 235 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index a83e420106d2..d50473d04aa3 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -513,6 +513,81 @@ public void allRegionsUnhealthyHandling(String partitionLevelCircuitBreakerConfi System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId1 = "dbs/db1/colls/coll1"; + String collectionResourceId2 = "dbs/db1/colls/coll2"; + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + RxDocumentServiceRequest request1 = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId1, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + RxDocumentServiceRequest request2 = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId2, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + int exceptionCountToHandle + = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle + 1; i++) { + globalPartitionEndpointManagerForCircuitBreaker + .handleLocationExceptionForPartitionKeyRange(request1, LocationEastUs2EndpointToLocationPair.getKey()); + } + + globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request2); + + Map locationToLocationSpecificContextMappingsForColl1 + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId1)); + + Map locationToLocationSpecificContextMappingsForColl2 + = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId2)); + + LocationSpecificContext locationSpecificContext1 + = locationToLocationSpecificContextMappingsForColl1.get(LocationEastUs2EndpointToLocationPair.getKey()); + + LocationSpecificContext locationSpecificContext2 + = locationToLocationSpecificContextMappingsForColl2.get(LocationEastUs2EndpointToLocationPair.getKey()); + + assertThat(locationSpecificContext1.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificContext1.isExceptionThresholdBreached()).isTrue(); + + assertThat(locationSpecificContext2.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificContext2.isExceptionThresholdBreached()).isFalse(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( OperationType operationType, ResourceType resourceType, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 2d8b494cd532..0bbb634563e6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -1481,233 +1481,6 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( 15); } - @Test(groups = {"multi-master"}) - public void operationHitsTerminalExceptionInMultipleContainers() { - logger.info("Checking circuit breaking behavior for {}", FaultInjectionOperationType.READ_ITEM); - - List preferredRegions = this.writeRegions; - CosmosClientBuilder clientBuilder = getClientBuilder().multipleWriteRegionsEnabled(true).preferredRegions(preferredRegions); - - ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - - if (connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { - throw new SkipException("Test is not applicable to GATEWAY connectivity mode!"); - } - - CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncDatabase database = getSharedCosmosDatabase(client); - - String multiPartitionContainerId1 = UUID.randomUUID() + "-multi-partition-test-container"; - String multiPartitionContainerId2 = UUID.randomUUID() + "-multi-partition-test-container"; - - CosmosAsyncContainer container1 = null; - CosmosContainerProperties containerProperties1 = new CosmosContainerProperties(multiPartitionContainerId1, "/id"); - ThroughputProperties throughputProperties1 = ThroughputProperties.createManualThroughput(12_000); - - CosmosAsyncContainer container2 = null; - CosmosContainerProperties containerProperties2 = new CosmosContainerProperties(multiPartitionContainerId2, "/id"); - ThroughputProperties throughputProperties2 = ThroughputProperties.createManualThroughput(12_000); - - try { - - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); - - - database.createContainerIfNotExists(containerProperties1, throughputProperties1).block(); - container1 = database.getContainer(multiPartitionContainerId1); - - database.createContainerIfNotExists(containerProperties2, throughputProperties2).block(); - container2 = database.getContainer(multiPartitionContainerId2); - - Thread.sleep(10_000); - - int testObjCountToBootstrapFrom = 2; - List testObjects1 = new ArrayList<>(); - - for (int i = 1; i <= testObjCountToBootstrapFrom; i++) { - TestObject testObject = TestObject.create(); - testObjects1.add(testObject); - container1.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - container2.createItem(testObject, new PartitionKey(testObject.getId()), new CosmosItemRequestOptions()).block(); - } - - FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() - .operationType(FaultInjectionOperationType.READ_ITEM) - .connectionType(FaultInjectionConnectionType.DIRECT) - .endpoints(new FaultInjectionEndpointBuilder(FeedRange.forLogicalPartition(new PartitionKey(testObjects1.get(0).getId()))).build()) - .region(preferredRegions.get(0)) - .build(); - - FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders - .getResultBuilder(FaultInjectionServerErrorType.SERVICE_UNAVAILABLE) - .build(); - - FaultInjectionRule faultInjectionRule1 = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(11) - .build(); - - FaultInjectionRule faultInjectionRule2 = new FaultInjectionRuleBuilder("service-unavailable-rule-" + UUID.randomUUID()) - .condition(faultInjectionCondition) - .result(faultInjectionServerErrorResult) - .hitLimit(11) - .build(); - - if (faultInjectionRule1 != null && faultInjectionRule2 != null) { - - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container1, Arrays.asList(faultInjectionRule1)) - .block(); - - CosmosFaultInjectionHelper - .configureFaultInjectionRules(container2, Arrays.asList(faultInjectionRule2)) - .block(); - - OperationInvocationParamsWrapper paramsWrapper1 = new OperationInvocationParamsWrapper(); - OperationInvocationParamsWrapper paramsWrapper2 = new OperationInvocationParamsWrapper(); - - Function> faultInjectedFunc = resolveDataPlaneOperation(FaultInjectionOperationType.READ_ITEM); - - for (int i = 1; i <= 15; i++) { - paramsWrapper1.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper1.asyncContainer = container1; - - paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper2.asyncContainer = container2; - - ResponseWrapper response1 = faultInjectedFunc.apply(paramsWrapper1); - ResponseWrapper response2 = faultInjectedFunc.apply(paramsWrapper2); - - logger.info("Hit count : {}", faultInjectionRule1.getHitCount()); - logger.info("Hit count : {}", faultInjectionRule2.getHitCount()); - - if (response1.cosmosItemResponse != null) { - assertThat(response1.cosmosItemResponse).isNotNull(); - assertThat(response1.cosmosItemResponse.getDiagnostics()).isNotNull(); - response1.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response1.feedResponse != null) { - assertThat(response1.feedResponse).isNotNull(); - assertThat(response1.feedResponse.getCosmosDiagnostics()).isNotNull(); - - response1.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response1.cosmosException != null) { - assertThat(response1.cosmosException).isNotNull(); - assertThat(response1.cosmosException.getDiagnostics()).isNotNull(); - - response1.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - if (response2.cosmosItemResponse != null) { - assertThat(response2.cosmosItemResponse).isNotNull(); - assertThat(response2.cosmosItemResponse.getDiagnostics()).isNotNull(); - - response2.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response2.feedResponse != null) { - assertThat(response2.feedResponse).isNotNull(); - assertThat(response2.feedResponse.getCosmosDiagnostics()).isNotNull(); - - response2.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response2.cosmosException != null) { - assertThat(response2.cosmosException).isNotNull(); - assertThat(response2.cosmosException.getDiagnostics()).isNotNull(); - - response2.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - - logger.info("Sleep for 120 seconds"); - Thread.sleep(120_000); - - for (int i = 16; i <= 30; i++) { - paramsWrapper1.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper1.asyncContainer = container1; - - paramsWrapper2.createdTestObject = testObjects1.isEmpty() ? null : testObjects1.get(0); - paramsWrapper2.asyncContainer = container2; - - ResponseWrapper response1 = faultInjectedFunc.apply(paramsWrapper1); - ResponseWrapper response2 = faultInjectedFunc.apply(paramsWrapper2); - - logger.info("Hit count : {}", faultInjectionRule1.getHitCount()); - logger.info("Hit count : {}", faultInjectionRule2.getHitCount()); - - if (response1.cosmosItemResponse != null) { - assertThat(response1.cosmosItemResponse).isNotNull(); - assertThat(response1.cosmosItemResponse.getDiagnostics()).isNotNull(); - - response1.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response1.feedResponse != null) { - assertThat(response1.feedResponse).isNotNull(); - assertThat(response1.feedResponse.getCosmosDiagnostics()).isNotNull(); - - response1.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response1.cosmosException != null) { - assertThat(response1.cosmosException).isNotNull(); - assertThat(response1.cosmosException.getDiagnostics()).isNotNull(); - - response1.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - if (response2.cosmosItemResponse != null) { - assertThat(response2.cosmosItemResponse).isNotNull(); - assertThat(response2.cosmosItemResponse.getDiagnostics()).isNotNull(); - - response2.cosmosItemResponse.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response2.feedResponse != null) { - assertThat(response2.feedResponse).isNotNull(); - assertThat(response2.feedResponse.getCosmosDiagnostics()).isNotNull(); - - response2.feedResponse.getCosmosDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } else if (response2.cosmosException != null) { - assertThat(response2.cosmosException).isNotNull(); - assertThat(response2.cosmosException.getDiagnostics()).isNotNull(); - - response2.cosmosException.getDiagnostics().getDiagnosticsContext().getContactedRegionNames().forEach( - regionContacted -> logger.info("Region contacted : {}", regionContacted) - ); - } - } - } - - logger.info("End test"); - } catch (InterruptedException ex) { - fail("InterruptedException should not have been thrown!"); - } catch (Exception ex) { - logger.error("Exception thrown :", ex); - fail("Test should have passed!"); - } finally { - System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); - safeDeleteCollection(container1); - safeDeleteCollection(container2); - safeClose(client); - } - } - private void execute( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -2627,11 +2400,7 @@ private static void deleteAllDocuments(CosmosAsyncContainer asyncContainer) { .blockLast(); } - private static boolean requiresClientLevelE2EConfig(FaultInjectionOperationType faultInjectionOperationType) { - return faultInjectionOperationType == FaultInjectionOperationType.READ_FEED_ITEM; - } - private enum QueryType { - READ_MANY, READ_ALL, QUERY_TEXT_BASED, READ_FEED + READ_MANY, READ_ALL } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 3711af592741..2351f597b700 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -6255,15 +6255,20 @@ private Mono executeFeedOperationWithAvailabilityStrategy( Map partitionKeyRangesWithSuccess = new ConcurrentHashMap<>(); - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, false); - feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow.setIsRequestHedged(false); - req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow); if (orderedApplicableRegionsForSpeculation.size() < 2) { + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, false); + feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow.setIsRequestHedged(false); + req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow); + // There is at most one applicable region - no hedging possible return feedOperation.apply(retryPolicyFactory, req); } + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); + feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow.setIsRequestHedged(false); + req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow); + ThresholdBasedAvailabilityStrategy availabilityStrategy = (ThresholdBasedAvailabilityStrategy)endToEndPolicyConfig.getAvailabilityStrategy(); List>> monoList = new ArrayList<>(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index f3869c360d1e..5e23072844c9 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -223,6 +223,10 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques return false; } + if (request.getOperationType() == OperationType.QueryPlan) { + return false; + } + GlobalEndpointManager globalEndpointManager = this.globalEndpointManager; if (!globalEndpointManager.canUseMultipleWriteLocations(request)) { From 552e4ff4a3bbd85ab402d86c8b13195ac30b1a1f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 19 Jun 2024 12:04:55 -0400 Subject: [PATCH 080/140] Fixing tests. --- ...EndpointManagerForCircuitBreakerTests.java | 227 ++++++++++++++++-- .../PartitionLevelCircuitBreakerTests.java | 11 +- ...itionEndpointManagerForCircuitBreaker.java | 8 +- 3 files changed, 223 insertions(+), 23 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index d50473d04aa3..41518039541f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -11,14 +11,22 @@ import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import org.apache.commons.lang3.tuple.Pair; import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.net.URI; +import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; import static com.azure.cosmos.implementation.TestUtils.mockDiagnosticsClientContext; @@ -26,6 +34,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreakerTests { + private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreakerTests.class); private final static Pair LocationEastUsEndpointToLocationPair = Pair.of(createUrl("https://contoso-east-us.documents.azure.com"), "eastus"); private final static Pair LocationEastUs2EndpointToLocationPair = Pair.of(createUrl("https://contoso-east-us-2.documents.azure.com"), "eastus2"); private final static Pair LocationCentralUsEndpointToLocationPair = Pair.of(createUrl("https://contoso-central-us.documents.azure.com"), "centralus"); @@ -101,7 +110,7 @@ public Object[][] partitionLevelCircuitBreakerConfigs() { } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -125,8 +134,12 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS globalPartitionEndpointManagerForCircuitBreaker .handleLocationSuccessForPartitionKeyRange(request); + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -139,7 +152,7 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -174,8 +187,12 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -188,7 +205,7 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -228,8 +245,12 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -242,7 +263,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -284,8 +305,12 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -309,7 +334,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -351,8 +376,12 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -383,7 +412,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -424,9 +453,12 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); LocationSpecificContext locationSpecificContext @@ -457,7 +489,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void allRegionsUnhealthyHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker @@ -504,8 +536,12 @@ public void allRegionsUnhealthyHandling(String partitionLevelCircuitBreakerConfi .handleLocationExceptionForPartitionKeyRange(request, LocationCentralUsEndpointToLocationPair.getKey()); } + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Map locationToLocationSpecificContextMappings - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); assertThat(locationToLocationSpecificContextMappings).isNull(); @@ -514,7 +550,7 @@ public void allRegionsUnhealthyHandling(String partitionLevelCircuitBreakerConfi } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker @@ -565,12 +601,16 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request2); + Method getLocationToLocationSpecificContextMappingsMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); + getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Map locationToLocationSpecificContextMappingsForColl1 - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId1)); Map locationToLocationSpecificContextMappingsForColl2 - = globalPartitionEndpointManagerForCircuitBreaker.getLocationToLocationSpecificContextMappings(new PartitionKeyRangeWrapper( + = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId2)); LocationSpecificContext locationSpecificContext1 @@ -588,6 +628,159 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } + @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") + public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) { + + System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); + + int threadPoolSizeForExecutors = 4; + + ScheduledThreadPoolExecutor executorForEastUs = new ScheduledThreadPoolExecutor(threadPoolSizeForExecutors); + executorForEastUs.setRemoveOnCancelPolicy(true); + executorForEastUs.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + + ScheduledThreadPoolExecutor executorForCentralUs = new ScheduledThreadPoolExecutor(threadPoolSizeForExecutors); + executorForCentralUs.setRemoveOnCancelPolicy(true); + executorForCentralUs.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + + ScheduledThreadPoolExecutor executorForEastUs2 = new ScheduledThreadPoolExecutor(threadPoolSizeForExecutors); + executorForEastUs2.setRemoveOnCancelPolicy(true); + executorForEastUs2.setExecuteExistingDelayedTasksAfterShutdownPolicy(false); + + List> scheduledFutures = new ArrayList<>(); + + String pkRangeId = "0"; + String minInclusive = "AA"; + String maxExclusive = "BB"; + String collectionResourceId = "dbs/db1/colls/coll1"; + PartitionKeyRange partitionKeyRange = new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive); + + List applicableReadWriteEndpoints = ImmutableList.of( + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) + .stream() + .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .collect(Collectors.toList()); + + Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + Mockito.when(this.globalEndpointManagerMock.getApplicableReadEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); + + RxDocumentServiceRequest requestCentralUs = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationCentralUsEndpointToLocationPair.getKey()); + + RxDocumentServiceRequest requestEastUs = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUsEndpointToLocationPair.getKey()); + + RxDocumentServiceRequest requestEastUs2 = constructRxDocumentServiceRequestInstance( + readOperationTrue ? OperationType.Read : OperationType.Create, + ResourceType.Document, + collectionResourceId, + pkRangeId, + minInclusive, + maxExclusive, + LocationEastUs2EndpointToLocationPair.getKey()); + + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker + = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + + int exceptionCountToHandle = globalPartitionEndpointManagerForCircuitBreaker + .getConsecutiveExceptionBasedCircuitBreaker() + .getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, readOperationTrue); + + for (int i = 1; i <= exceptionCountToHandle * 10; i++) { + + ScheduledFuture scheduledFutureForEastUs = executorForEastUs.schedule( + () -> validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + globalPartitionEndpointManagerForCircuitBreaker, + requestEastUs, + LocationEastUsEndpointToLocationPair.getKey(), + collectionResourceId, + partitionKeyRange, + applicableReadWriteEndpoints), + 1, + TimeUnit.MILLISECONDS); + + ScheduledFuture scheduledFutureForCentralUs = executorForCentralUs.schedule( + () -> validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + globalPartitionEndpointManagerForCircuitBreaker, + requestCentralUs, + LocationCentralUsEndpointToLocationPair.getKey(), + collectionResourceId, + partitionKeyRange, + applicableReadWriteEndpoints), + 1, + TimeUnit.MILLISECONDS); + + ScheduledFuture scheduledFutureForEastUs2 = executorForEastUs2.schedule( + () -> validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + globalPartitionEndpointManagerForCircuitBreaker, + requestEastUs2, + LocationEastUs2EndpointToLocationPair.getKey(), + collectionResourceId, + partitionKeyRange, + applicableReadWriteEndpoints), + 1, + TimeUnit.MILLISECONDS); + + scheduledFutures.add(scheduledFutureForEastUs); + scheduledFutures.add(scheduledFutureForCentralUs); + scheduledFutures.add(scheduledFutureForEastUs2); + } + + while (true) { + + boolean areTasksStillRunning = false; + + for (ScheduledFuture scheduledFuture : scheduledFutures) { + if (!scheduledFuture.isDone()) { + areTasksStillRunning = true; + break; + } + } + + if (!areTasksStillRunning) { + break; + } + } + + executorForEastUs.shutdown(); + executorForCentralUs.shutdown(); + executorForEastUs2.shutdown(); + + System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); + } + + private static void validateAllRegionsAreNotUnavailableAfterExceptionInLocation( + GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker, + RxDocumentServiceRequest request, + URI locationWithFailure, + String collectionResourceId, + PartitionKeyRange partitionKeyRange, + List applicableReadWriteLocations) { + + logger.warn("Handling exception for {}", locationWithFailure.getPath()); + globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(request, locationWithFailure); + + List unavailableLocations + = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(collectionResourceId, partitionKeyRange); + + logger.info("Assert for all regions are not Unavailable!"); + assertThat(unavailableLocations.size()).isLessThan(applicableReadWriteLocations.size()); + } + private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( OperationType operationType, ResourceType resourceType, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 0bbb634563e6..53955c7663df 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -63,6 +63,8 @@ import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import java.lang.reflect.Field; +import java.lang.reflect.Method; import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; @@ -1536,6 +1538,10 @@ private void execute( GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); + Method getAverageExceptionCountByPartitionKeyRangeByRegionMethod + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getAverageExceptionCountByPartitionKeyRangeByRegion", PartitionKeyRangeWrapper.class); + getAverageExceptionCountByPartitionKeyRangeByRegionMethod.setAccessible(true); + faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(operationInvocationParamsWrapper.faultyFeedRange); faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); @@ -1595,6 +1601,8 @@ private void execute( int executionCountAfterCircuitBreakingThresholdBreached = 0; List testObjects = operationInvocationParamsWrapper.testObjectsForDataPlaneOperationToWorkWith; + PartitionKeyRangeWrapper partitionKeyRangeWrapper + = new PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId()); for (int i = 1; i <= operationIterationCountInFailureFlow; i++) { @@ -1615,8 +1623,7 @@ private void execute( consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, true); if (!hasReachedCircuitBreakingThreshold) { - hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == globalPartitionEndpointManagerForCircuitBreaker.getExceptionCountByPartitionKeyRange( - new PartitionKeyRangeWrapper(faultyPartitionKeyRanges.v.get(0), faultyDocumentCollection.v.getResourceId())); + hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == (int) getAverageExceptionCountByPartitionKeyRangeByRegionMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, partitionKeyRangeWrapper); validateResponseInPresenceOfFailures.accept(response); } else { executionCountAfterCircuitBreakingThresholdBreached++; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 5e23072844c9..e74046d9e524 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -342,8 +342,8 @@ public ConcurrentHashMap getRegionToHealthStatus() { } } - // todo: keep private and access through reflection - public int getExceptionCountByPartitionKeyRange(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { + // note: used by reflection in PartitionLevelCircuitBreakerTests + private int getAverageExceptionCountByPartitionKeyRangeByRegion(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); @@ -369,8 +369,8 @@ public int getExceptionCountByPartitionKeyRange(PartitionKeyRangeWrapper partiti return 0; } - // todo: keep private and access through reflection - public Map getLocationToLocationSpecificContextMappings(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { + // note: used by reflection in GlobalPartitionEndpointManagerForCircuitBreakerTests + private Map getLocationToLocationSpecificContextMappings(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); From 0e4fdbe85f8c1a42547516a091516d60ee6dba14 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 19 Jun 2024 14:23:01 -0400 Subject: [PATCH 081/140] Refactoring. --- ...EndpointManagerForCircuitBreakerTests.java | 265 +++++++++++++----- .../PartitionLevelCircuitBreakerTests.java | 206 +++++++++----- ...itionEndpointManagerForCircuitBreaker.java | 53 +--- 3 files changed, 337 insertions(+), 187 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 41518039541f..8e3b0fe71181 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -17,6 +17,7 @@ import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; import java.lang.reflect.Method; import java.net.URI; @@ -24,6 +25,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; @@ -110,7 +112,7 @@ public Object[][] partitionLevelCircuitBreakerConfigs() { } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -134,16 +136,30 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS globalPartitionEndpointManagerForCircuitBreaker .handleLocationSuccessForPartitionKeyRange(request); - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); - Map locationToLocationSpecificContextMappings - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( - new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); LocationSpecificContext locationSpecificContext - = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); @@ -152,7 +168,7 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -187,16 +203,30 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); - Map locationToLocationSpecificContextMappings - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( - new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); LocationSpecificContext locationSpecificContext - = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); @@ -205,7 +235,7 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void recordHealthyWithFailuresToUnavailableStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -245,16 +275,30 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); - Map locationToLocationSpecificContextMappings - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( - new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); LocationSpecificContext locationSpecificContext - = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); @@ -263,7 +307,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void recordUnavailableToHealthyTentativeStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -305,16 +349,30 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); - Map locationToLocationSpecificContextMappings - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( - new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); LocationSpecificContext locationSpecificContext - = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); @@ -325,7 +383,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition throw new RuntimeException(ex); } - locationSpecificContext = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + locationSpecificContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); @@ -334,7 +392,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void recordHealthyTentativeToHealthyStatusTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -376,16 +434,30 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); - Map locationToLocationSpecificContextMappings - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( - new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); LocationSpecificContext locationSpecificContext - = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); @@ -396,7 +468,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve throw new RuntimeException(ex); } - locationSpecificContext = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + locationSpecificContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); int successCountToUpgradeStatus = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus.HealthyTentative, readOperationTrue); @@ -412,7 +484,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void recordHealthyTentativeToUnavailableTransition(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); @@ -453,16 +525,31 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC globalPartitionEndpointManagerForCircuitBreaker .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); - Map locationToLocationSpecificContextMappings - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( - new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); LocationSpecificContext locationSpecificContext - = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); @@ -480,7 +567,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } - locationSpecificContext = locationToLocationSpecificContextMappings.get(LocationEastUs2EndpointToLocationPair.getKey()); + locationSpecificContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); @@ -489,7 +576,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws IllegalAccessException, NoSuchFieldException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker @@ -536,21 +623,32 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon .handleLocationExceptionForPartitionKeyRange(request, LocationCentralUsEndpointToLocationPair.getKey()); } - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); - Map locationToLocationSpecificContextMappings - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( - new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId)); + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); - assertThat(locationToLocationSpecificContextMappings).isNull(); + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); + + assertThat(partitionAndLocationSpecificUnavailabilityInfo).isNull(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @Test(groups = {"unit"}, dataProvider = "partitionLevelCircuitBreakerConfigs") - public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchMethodException, InvocationTargetException, IllegalAccessException { + public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(String partitionLevelCircuitBreakerConfigAsJsonString, boolean readOperationTrue) throws NoSuchFieldException, IllegalAccessException { System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", partitionLevelCircuitBreakerConfigAsJsonString); GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker @@ -601,23 +699,41 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St globalPartitionEndpointManagerForCircuitBreaker.handleLocationSuccessForPartitionKeyRange(request2); - Method getLocationToLocationSpecificContextMappingsMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getLocationToLocationSpecificContextMappings", PartitionKeyRangeWrapper.class); - getLocationToLocationSpecificContextMappingsMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); - Map locationToLocationSpecificContextMappingsForColl1 - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); + + Object partitionLevelLocationUnavailabilityInfoSnapshotForColl1 + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId1)); - Map locationToLocationSpecificContextMappingsForColl2 - = (Map) getLocationToLocationSpecificContextMappingsMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, new PartitionKeyRangeWrapper( + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl1 + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl1); + + Object partitionLevelLocationUnavailabilityInfoSnapshotForColl2 + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId2)); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl2 + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl2); + LocationSpecificContext locationSpecificContext1 - = locationToLocationSpecificContextMappingsForColl1.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartitionForColl1.get(LocationEastUs2EndpointToLocationPair.getKey()); LocationSpecificContext locationSpecificContext2 - = locationToLocationSpecificContextMappingsForColl2.get(LocationEastUs2EndpointToLocationPair.getKey()); + = locationEndpointToLocationSpecificContextForPartitionForColl2.get(LocationEastUs2EndpointToLocationPair.getKey()); assertThat(locationSpecificContext1.isRegionAvailableToProcessRequests()).isFalse(); assertThat(locationSpecificContext1.isExceptionThresholdBreached()).isTrue(); @@ -811,4 +927,15 @@ private static URI createUrl(String url) { throw new IllegalArgumentException(e); } } + + private static Class getClassBySimpleName(Class[] classes, String classSimpleName) { + for (Class clazz : classes) { + if (clazz.getSimpleName().equals(classSimpleName)) { + return clazz; + } + } + + logger.warn("Class with simple name {} does not exist!", classSimpleName); + return null; + } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 53955c7663df..48c2e49aa87e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -23,6 +23,7 @@ import com.azure.cosmos.implementation.circuitBreaker.ConsecutiveExceptionBasedCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificContext; import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; @@ -65,6 +66,7 @@ import java.lang.reflect.Field; import java.lang.reflect.Method; +import java.net.URI; import java.time.Duration; import java.util.ArrayList; import java.util.Arrays; @@ -1100,74 +1102,74 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, - { - "Test read all operation injected with server-generated GONE in first preferred region.", - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionDuration(Duration.ofSeconds(60)) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildServerGeneratedGoneError, - executeReadManyOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - "Test read all operation injected with too many requests error in first preferred region.", - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionDuration(Duration.ofSeconds(60)) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildTooManyRequestsError, - executeReadManyOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - "Test read all operation injected with read/write session not available error in first preferred region.", - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionDuration(Duration.ofSeconds(60)) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildReadWriteSessionNotAvailableRules, - executeReadManyOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - "Test read all operation injected with service unavailable error in all regions.", - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withHitLimit(11) - .withFaultInjectionApplicableRegions(this.writeRegions), - this.buildServiceUnavailableError, - executeReadManyOperation, - noEndToEndTimeout, - noRegionSwitchHint, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - } +// { +// "Test read all operation injected with server-generated GONE in first preferred region.", +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), +// this.buildServerGeneratedGoneError, +// executeReadManyOperation, +// twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, +// noRegionSwitchHint, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ConnectionMode.DIRECT +// }, +// { +// "Test read all operation injected with too many requests error in first preferred region.", +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), +// this.buildTooManyRequestsError, +// executeReadManyOperation, +// twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, +// noRegionSwitchHint, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ConnectionMode.DIRECT +// }, +// { +// "Test read all operation injected with read/write session not available error in first preferred region.", +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionDuration(Duration.ofSeconds(60)) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), +// this.buildReadWriteSessionNotAvailableRules, +// executeReadManyOperation, +// twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, +// noRegionSwitchHint, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ConnectionMode.DIRECT +// }, +// { +// "Test read all operation injected with service unavailable error in all regions.", +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withHitLimit(11) +// .withFaultInjectionApplicableRegions(this.writeRegions), +// this.buildServiceUnavailableError, +// executeReadManyOperation, +// noEndToEndTimeout, +// noRegionSwitchHint, +// this.validateResponseHasServiceUnavailableError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasAllRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ConnectionMode.DIRECT +// } }; } @@ -1538,9 +1540,21 @@ private void execute( GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = documentClient.getGlobalPartitionEndpointManagerForCircuitBreaker(); - Method getAverageExceptionCountByPartitionKeyRangeByRegionMethod - = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredMethod("getAverageExceptionCountByPartitionKeyRangeByRegion", PartitionKeyRangeWrapper.class); - getAverageExceptionCountByPartitionKeyRangeByRegionMethod.setAccessible(true); + Class[] enclosedClasses = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredClasses(); + Class partitionLevelUnavailabilityInfoClass + = getClassBySimpleName(enclosedClasses, "PartitionLevelLocationUnavailabilityInfo"); + assertThat(partitionLevelUnavailabilityInfoClass).isNotNull(); + + Field partitionKeyRangeToLocationSpecificUnavailabilityInfoField + = GlobalPartitionEndpointManagerForCircuitBreaker.class.getDeclaredField("partitionKeyRangeToLocationSpecificUnavailabilityInfo"); + partitionKeyRangeToLocationSpecificUnavailabilityInfoField.setAccessible(true); + + Field locationEndpointToLocationSpecificContextForPartitionField + = partitionLevelUnavailabilityInfoClass.getDeclaredField("locationEndpointToLocationSpecificContextForPartition"); + locationEndpointToLocationSpecificContextForPartitionField.setAccessible(true); + + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo + = (ConcurrentHashMap) partitionKeyRangeToLocationSpecificUnavailabilityInfoField.get(globalPartitionEndpointManagerForCircuitBreaker); faultInjectionRuleParamsWrapper.withFaultInjectionApplicableFeedRange(operationInvocationParamsWrapper.faultyFeedRange); faultInjectionRuleParamsWrapper.withFaultInjectionApplicableAsyncContainer(container); @@ -1623,7 +1637,12 @@ private void execute( consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, true); if (!hasReachedCircuitBreakingThreshold) { - hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == (int) getAverageExceptionCountByPartitionKeyRangeByRegionMethod.invoke(globalPartitionEndpointManagerForCircuitBreaker, partitionKeyRangeWrapper); + hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == + getAverageExceptionCountByPartitionKeyRangeByRegion( + partitionKeyRangeWrapper, + partitionKeyRangeToLocationSpecificUnavailabilityInfo, + locationEndpointToLocationSpecificContextForPartitionField + ); validateResponseInPresenceOfFailures.accept(response); } else { executionCountAfterCircuitBreakingThresholdBreached++; @@ -2407,6 +2426,49 @@ private static void deleteAllDocuments(CosmosAsyncContainer asyncContainer) { .blockLast(); } + private static Class getClassBySimpleName(Class[] classes, String classSimpleName) { + for (Class clazz : classes) { + if (clazz.getSimpleName().equals(classSimpleName)) { + return clazz; + } + } + + logger.warn("Class with simple name {} does not exist!", classSimpleName); + return null; + } + + private static int getAverageExceptionCountByPartitionKeyRangeByRegion( + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo, + Field locationEndpointToLocationSpecificContextForPartitionField) throws IllegalAccessException { + + Object partitionAndLocationSpecificUnavailabilityInfo + = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + + int count = 0; + int regionCountWithFailures = 0; + boolean failuresExist = false; + + for (LocationSpecificContext locationSpecificContext : locationEndpointToLocationSpecificContextForPartition.values()) { + count += locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite(); + + if (locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite() > 0) { + failuresExist = true; + regionCountWithFailures++; + } + } + + if (failuresExist) { + return count / regionCountWithFailures; + } + + return 0; + + } + private enum QueryType { READ_MANY, READ_ALL } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index e74046d9e524..d3439b412c07 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -34,13 +34,13 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final GlobalEndpointManager globalEndpointManager; private final ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo; - private final ConcurrentHashMap partitionsWithPossibleUnavailableRegions; + private final ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions; private final LocationContextTransitionHandler locationContextTransitionHandler; private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); - this.partitionsWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); + this.partitionKeyRangesWithPossibleUnavailableRegions = new ConcurrentHashMap<>(); this.globalEndpointManager = globalEndpointManager; PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); @@ -175,7 +175,7 @@ private Flux updateStaleLocationInfo() { return Mono.just(1) .delayElement(Duration.ofSeconds(60)) .repeat() - .flatMap(ignore -> Flux.fromIterable(this.partitionsWithPossibleUnavailableRegions.entrySet())) + .flatMap(ignore -> Flux.fromIterable(this.partitionKeyRangesWithPossibleUnavailableRegions.entrySet())) .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { @@ -206,7 +206,7 @@ private Flux updateStaleLocationInfo() { }); } } else { - this.partitionsWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); + this.partitionKeyRangesWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); } return Mono.empty(); @@ -250,7 +250,7 @@ private PartitionLevelLocationUnavailabilityInfo() { this.locationContextTransitionHandler = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationContextTransitionHandler; } - public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException, boolean isReadOnlyRequest) { + private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException, boolean isReadOnlyRequest) { AtomicBoolean isExceptionThresholdBreached = new AtomicBoolean(false); @@ -270,7 +270,7 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper LocationSpecificContext locationSpecificContextAfterTransition = this.locationContextTransitionHandler.handleException( locationSpecificContextAsVal, partitionKeyRangeWrapper, - GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionsWithPossibleUnavailableRegions, + GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionKeyRangesWithPossibleUnavailableRegions, locationWithException, isReadOnlyRequest); @@ -287,7 +287,7 @@ public boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper return isExceptionThresholdBreached.get(); } - public void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation, boolean isReadOnlyRequest) { + private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation, boolean isReadOnlyRequest) { this.locationEndpointToLocationSpecificContextForPartition.compute(succeededLocation, (locationAsKey, locationSpecificContextAsVal) -> { LocationSpecificContext locationSpecificContextAfterTransition; @@ -342,45 +342,6 @@ public ConcurrentHashMap getRegionToHealthStatus() { } } - // note: used by reflection in PartitionLevelCircuitBreakerTests - private int getAverageExceptionCountByPartitionKeyRangeByRegion(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { - - PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = - this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); - - int count = 0; - int regionCountWithFailures = 0; - boolean failuresExist = false; - - for (LocationSpecificContext locationSpecificContext - : partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition.values()) { - count += locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite(); - - if (locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite() > 0) { - failuresExist = true; - regionCountWithFailures++; - } - } - - if (failuresExist) { - return count / regionCountWithFailures; - } - - return 0; - } - - // note: used by reflection in GlobalPartitionEndpointManagerForCircuitBreakerTests - private Map getLocationToLocationSpecificContextMappings(PartitionKeyRangeWrapper partitionKeyRangeWrapper) { - PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = - this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); - - if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { - return partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition; - } - - return null; - } - public ConsecutiveExceptionBasedCircuitBreaker getConsecutiveExceptionBasedCircuitBreaker() { return this.consecutiveExceptionBasedCircuitBreaker; } From 58a2bde25a84913c2a7112b5c397a9752d84786d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 19 Jun 2024 14:24:41 -0400 Subject: [PATCH 082/140] Refactoring. --- .../PartitionLevelCircuitBreakerTests.java | 136 +++++++++--------- 1 file changed, 68 insertions(+), 68 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 48c2e49aa87e..e4a5255ecb59 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -1102,74 +1102,74 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, -// { -// "Test read all operation injected with server-generated GONE in first preferred region.", -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), -// this.buildServerGeneratedGoneError, -// executeReadManyOperation, -// twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, -// noRegionSwitchHint, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ConnectionMode.DIRECT -// }, -// { -// "Test read all operation injected with too many requests error in first preferred region.", -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), -// this.buildTooManyRequestsError, -// executeReadManyOperation, -// twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, -// noRegionSwitchHint, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ConnectionMode.DIRECT -// }, -// { -// "Test read all operation injected with read/write session not available error in first preferred region.", -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionDuration(Duration.ofSeconds(60)) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), -// this.buildReadWriteSessionNotAvailableRules, -// executeReadManyOperation, -// twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, -// noRegionSwitchHint, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ConnectionMode.DIRECT -// }, -// { -// "Test read all operation injected with service unavailable error in all regions.", -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withHitLimit(11) -// .withFaultInjectionApplicableRegions(this.writeRegions), -// this.buildServiceUnavailableError, -// executeReadManyOperation, -// noEndToEndTimeout, -// noRegionSwitchHint, -// this.validateResponseHasServiceUnavailableError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasAllRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ConnectionMode.DIRECT -// } + { + "Test read all operation injected with server-generated GONE in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildServerGeneratedGoneError, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read all operation injected with too many requests error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildTooManyRequestsError, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read all operation injected with read/write session not available error in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildReadWriteSessionNotAvailableRules, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + noRegionSwitchHint, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + { + "Test read all operation injected with service unavailable error in all regions.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions), + this.buildServiceUnavailableError, + executeReadManyOperation, + noEndToEndTimeout, + noRegionSwitchHint, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + } }; } From d5372fde48d18d4b4cc38a291be446d1026e6433 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 19 Jun 2024 17:17:58 -0400 Subject: [PATCH 083/140] Refactoring. --- ...nEndpointManagerForCircuitBreakerTests.java | 2 +- .../azure/cosmos/implementation/Configs.java | 18 +++++++++++++++++- ...titionEndpointManagerForCircuitBreaker.java | 6 ++++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 8e3b0fe71181..e997ac968c3e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -893,7 +893,7 @@ private static void validateAllRegionsAreNotUnavailableAfterExceptionInLocation( List unavailableLocations = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(collectionResourceId, partitionKeyRange); - logger.info("Assert for all regions are not Unavailable!"); + logger.info("Assert that all regions are not Unavailable!"); assertThat(unavailableLocations.size()).isLessThan(applicableReadWriteLocations.size()); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 9810ed245e84..8b425794cd26 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -209,7 +209,8 @@ public class Configs { private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = 2; private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS"; private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; - + private static final String STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = "COSMOS.STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS"; + private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 60; public Configs() { this.sslContext = sslContextInit(); @@ -643,4 +644,19 @@ public static int getStaleCollectionCacheRefreshRetryIntervalInSeconds() { return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS; } + + public static int getStalePartitionUnavailabilityRefreshIntervalInSeconds() { + String valueFromSystemProperty = System.getProperty(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + + if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { + return Integer.valueOf(valueFromSystemProperty); + } + + String valueFromEnvVariable = System.getenv(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); + if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { + return Integer.valueOf(valueFromEnvVariable); + } + + return DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index d3439b412c07..9d0453b35756 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -49,7 +49,9 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo } public void init() { - this.updateStaleLocationInfo().subscribeOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE).subscribe(); + if (this.consecutiveExceptionBasedCircuitBreaker.isPartitionLevelCircuitBreakerEnabled()) { + this.updateStaleLocationInfo().subscribeOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE).subscribe(); + } } public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest request, URI failedLocation) { @@ -173,7 +175,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String coll private Flux updateStaleLocationInfo() { return Mono.just(1) - .delayElement(Duration.ofSeconds(60)) + .delayElement(Duration.ofSeconds(Configs.getStalePartitionUnavailabilityRefreshIntervalInSeconds())) .repeat() .flatMap(ignore -> Flux.fromIterable(this.partitionKeyRangesWithPossibleUnavailableRegions.entrySet())) .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) From b06aca10f2a3861515d8d46c613739c7463ac67c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 19 Jun 2024 18:11:34 -0400 Subject: [PATCH 084/140] Refactoring. --- .../cosmos/RetryContextOnDiagnosticTest.java | 1 - .../PartitionLevelCircuitBreakerTests.java | 4 ---- .../directconnectivity/AddressResolverTest.java | 5 +---- .../GlobalAddressResolverTest.java | 6 ++---- .../implementation/GlobalEndpointManager.java | 15 +++------------ .../implementation/RxDocumentClientImpl.java | 3 +-- .../directconnectivity/AddressResolver.java | 10 +--------- .../directconnectivity/GlobalAddressResolver.java | 13 ++----------- .../GoneAndRetryWithRetryPolicy.java | 2 -- .../directconnectivity/IAddressResolver.java | 3 --- .../rntbd/RntbdRequestRecord.java | 1 - .../implementation/query/DocumentProducer.java | 5 ++--- 12 files changed, 12 insertions(+), 56 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java index 3e2b3074f178..a1f823196000 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java @@ -25,7 +25,6 @@ import com.azure.cosmos.implementation.ShouldRetryResult; import com.azure.cosmos.implementation.StoreResponseBuilder; import com.azure.cosmos.implementation.TestConfigurations; -import com.azure.cosmos.implementation.TestUtils; import com.azure.cosmos.implementation.directconnectivity.AddressSelector; import com.azure.cosmos.implementation.directconnectivity.ConsistencyReader; import com.azure.cosmos.implementation.directconnectivity.ConsistencyWriter; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index e4a5255ecb59..0ba5bb581cc4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -32,7 +32,6 @@ import com.azure.cosmos.models.CosmosBatch; import com.azure.cosmos.models.CosmosBatchResponse; import com.azure.cosmos.models.CosmosChangeFeedRequestOptions; -import com.azure.cosmos.models.CosmosContainerProperties; import com.azure.cosmos.models.CosmosItemIdentity; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; @@ -43,7 +42,6 @@ import com.azure.cosmos.models.FeedRange; import com.azure.cosmos.models.FeedResponse; import com.azure.cosmos.models.PartitionKey; -import com.azure.cosmos.models.ThroughputProperties; import com.azure.cosmos.test.faultinjection.CosmosFaultInjectionHelper; import com.azure.cosmos.test.faultinjection.FaultInjectionCondition; import com.azure.cosmos.test.faultinjection.FaultInjectionConditionBuilder; @@ -65,11 +63,9 @@ import reactor.core.publisher.Mono; import java.lang.reflect.Field; -import java.lang.reflect.Method; import java.net.URI; import java.time.Duration; import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java index 4c408634e857..4cd7f3e153f5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/AddressResolverTest.java @@ -6,7 +6,6 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.implementation.DocumentCollection; -import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.InvalidPartitionException; @@ -64,15 +63,13 @@ public class AddressResolverTest { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache fabricAddressCache; - private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private int collectionCacheRefreshedCount; private Map routingMapRefreshCount; private Map addressesRefreshCount; @BeforeClass(groups = "unit") public void before_AddressResolverTest() throws Exception { - this.globalPartitionEndpointManager = Mockito.mock(GlobalPartitionEndpointManagerForCircuitBreaker.class); - this.addressResolver = new AddressResolver(this.globalPartitionEndpointManager); + this.addressResolver = new AddressResolver(); this.collectionCache = Mockito.mock(RxCollectionCache.class); this.collectionRoutingMapCache = Mockito.mock(ICollectionRoutingMapCache.class); this.fabricAddressCache = Mockito.mock(IAddressCache.class); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java index c41e07cdf9d1..73c4110cea1c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java @@ -50,7 +50,6 @@ public class GlobalAddressResolverTest { private HttpClient httpClient; private GlobalEndpointManager endpointManager; - private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private IAuthorizationTokenProvider authorizationTokenProvider; private UserAgentContainer userAgentContainer; private RxCollectionCache collectionCache; @@ -111,7 +110,7 @@ public void resolveAsync() throws Exception { GlobalAddressResolver globalAddressResolver = new GlobalAddressResolver(mockDiagnosticsClientContext(), httpClient, endpointManager, Protocol.HTTPS, authorizationTokenProvider, collectionCache, routingMapProvider, userAgentContainer, - serviceConfigReader, connectionPolicy, null, globalPartitionEndpointManager); + serviceConfigReader, connectionPolicy, null); RxDocumentServiceRequest request; request = RxDocumentServiceRequest.createFromName(mockDiagnosticsClientContext(), OperationType.Read, @@ -146,8 +145,7 @@ public void submitOpenConnectionTasksAndInitCaches() { userAgentContainer, serviceConfigReader, connectionPolicy, - null, - globalPartitionEndpointManager); + null); GlobalAddressResolver.EndpointCache endpointCache = new GlobalAddressResolver.EndpointCache(); GatewayAddressCache gatewayAddressCache = Mockito.mock(GatewayAddressCache.class); endpointCache.addressCache = gatewayAddressCache; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index aefd03265b91..f36e5ae973ca 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -18,6 +18,7 @@ import java.time.Duration; import java.time.LocalDateTime; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.function.Function; @@ -46,8 +47,6 @@ public class GlobalEndpointManager implements AutoCloseable { private volatile Throwable latestDatabaseRefreshError; - private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; - public void setLatestDatabaseRefreshError(Throwable latestDatabaseRefreshError) { this.latestDatabaseRefreshError = latestDatabaseRefreshError; } @@ -105,12 +104,12 @@ public UnmodifiableList getApplicableWriteEndpoints(RxDocumentServiceReques public UnmodifiableList getApplicableReadEndpoints(List excludedRegions) { // readonly - return this.locationCache.getApplicableReadEndpoints(excludedRegions, new ArrayList<>()); + return this.locationCache.getApplicableReadEndpoints(excludedRegions, Collections.emptyList()); } public UnmodifiableList getApplicableWriteEndpoints(List excludedRegions) { //readonly - return this.locationCache.getApplicableWriteEndpoints(excludedRegions, new ArrayList<>()); + return this.locationCache.getApplicableWriteEndpoints(excludedRegions, Collections.emptyList()); } public List getAvailableReadEndpoints() { @@ -338,14 +337,6 @@ public String getRegionName(URI locationEndpoint, OperationType operationType) { return this.locationCache.getRegionName(locationEndpoint, operationType); } - public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { - return globalPartitionEndpointManagerForCircuitBreaker; - } - - public void setGlobalPartitionEndpointManagerForCircuitBreaker(GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { - this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; - } - public ConnectionPolicy getConnectionPolicy() { return this.connectionPolicy; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 2351f597b700..91dee8880b3a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -759,8 +759,7 @@ private void initializeDirectConnectivity() { // this.gatewayConfigurationReader, null, this.connectionPolicy, - this.apiType, - this.globalPartitionEndpointManagerForCircuitBreaker); + this.apiType); this.storeClientFactory = new StoreClientFactory( this.addressResolver, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index d237f408c62b..a4f066c21fd9 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -9,7 +9,6 @@ import com.azure.cosmos.implementation.BadRequestException; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.Exceptions; -import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ICollectionRoutingMapCache; import com.azure.cosmos.implementation.InternalServerErrorException; @@ -55,10 +54,8 @@ public class AddressResolver implements IAddressResolver { private RxCollectionCache collectionCache; private ICollectionRoutingMapCache collectionRoutingMapCache; private IAddressCache addressCache; - private GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; - public AddressResolver(GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { - this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; + public AddressResolver() { } public void initializeCaches( @@ -99,11 +96,6 @@ public void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proact throw new NotImplementedException("setOpenConnectionsProcessor is not supported on AddressResolver"); } - @Override - public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { - return this.globalPartitionEndpointManagerForCircuitBreaker; - } - private static boolean isSameCollection(PartitionKeyRange initiallyResolved, PartitionKeyRange newlyResolved) { if (initiallyResolved == null) { throw new IllegalArgumentException("parent"); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index f280a2581b58..fed1cbde61a8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -11,7 +11,6 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.OpenConnectionResponse; @@ -48,7 +47,6 @@ public class GlobalAddressResolver implements IAddressResolver { private final static int MaxBackupReadRegions = 3; private final DiagnosticsClientContext diagnosticsClientContext; private final GlobalEndpointManager endpointManager; - private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker; private final Protocol protocol; private final IAuthorizationTokenProvider tokenProvider; private final UserAgentContainer userAgentContainer; @@ -75,8 +73,7 @@ public GlobalAddressResolver( UserAgentContainer userAgentContainer, GatewayServiceConfigurationReader serviceConfigReader, ConnectionPolicy connectionPolicy, - ApiType apiType, - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker) { + ApiType apiType) { this.diagnosticsClientContext = diagnosticsClientContext; this.httpClient = httpClient; this.endpointManager = endpointManager; @@ -93,7 +90,6 @@ public GlobalAddressResolver( this.maxEndpoints = maxBackupReadEndpoints + 2; // for write and alternate write getEndpoint (during failover) this.addressCacheByEndpoint = new ConcurrentHashMap<>(); this.apiType = apiType; - this.globalPartitionEndpointManagerForCircuitBreaker = globalPartitionEndpointManagerForCircuitBreaker; for (URI endpoint : endpointManager.getWriteEndpoints()) { this.getOrAddEndpoint(endpoint); @@ -249,11 +245,6 @@ public void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proact } } - @Override - public GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker() { - return this.globalPartitionEndpointManagerForCircuitBreaker; - } - @Override public Mono resolveAsync(RxDocumentServiceRequest request, boolean forceRefresh) { IAddressResolver resolver = this.getAddressResolver(request); @@ -299,7 +290,7 @@ private EndpointCache getOrAddEndpoint(URI endpoint) { this.connectionPolicy, this.proactiveOpenConnectionsProcessor, this.gatewayServerErrorInjector); - AddressResolver addressResolver = new AddressResolver(this.globalPartitionEndpointManagerForCircuitBreaker); + AddressResolver addressResolver = new AddressResolver(); addressResolver.initializeCaches(this.collectionCache, this.routingMapProvider, gatewayAddressCache); EndpointCache cache = new EndpointCache(); cache.addressCache = gatewayAddressCache; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java index ceb1ddb4357f..8248ef1664c5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GoneAndRetryWithRetryPolicy.java @@ -17,7 +17,6 @@ import com.azure.cosmos.implementation.RetryWithException; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.ShouldRetryResult; -import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -193,7 +192,6 @@ public Mono shouldRetry(Exception exception) { Duration backoffTime = Duration.ofSeconds(0); Duration timeout; boolean forceRefreshAddressCache; - if (isNonRetryableException(exception)) { logger.debug("Operation will NOT be retried. Current attempt {}, Exception: ", this.attemptCount, exception); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java index 26286b3a0b1f..965c38517199 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/IAddressResolver.java @@ -4,7 +4,6 @@ package com.azure.cosmos.implementation.directconnectivity; import com.azure.cosmos.CosmosContainerProactiveInitConfig; -import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.OpenConnectionResponse; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.directconnectivity.rntbd.ProactiveOpenConnectionsProcessor; @@ -33,6 +32,4 @@ Mono resolveAsync( * @param proactiveOpenConnectionsProcessor the {@link ProactiveOpenConnectionsProcessor}. */ void setOpenConnectionsProcessor(ProactiveOpenConnectionsProcessor proactiveOpenConnectionsProcessor); - - GlobalPartitionEndpointManagerForCircuitBreaker getGlobalPartitionEndpointManagerForCircuitBreaker(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java index 09970c17277c..1e9d57bdcd35 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java @@ -9,7 +9,6 @@ import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.RequestTimeline; import com.azure.cosmos.implementation.RequestTimeoutException; -import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.fasterxml.jackson.core.JsonGenerator; import com.fasterxml.jackson.databind.SerializerProvider; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index cc5ef9b7f5cf..1bbcb8a3020e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -154,9 +154,8 @@ public DocumentProducer( finalRetryPolicy.onBeforeSendRequest(req); } ++retries; - return Mono.just(req); - }) - .flatMap(req -> executeRequestFunc.apply(req)), finalRetryPolicy); + return executeRequestFunc.apply(req); + }), finalRetryPolicy); }; this.correlatedActivityId = correlatedActivityId; From c30090d39b8e0e1a948ed73ee5f06e2262ea8dd1 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 20 Jun 2024 12:17:39 -0400 Subject: [PATCH 085/140] Adding code comments. --- .../PartitionLevelCircuitBreakerTests.java | 417 ++++++++++++++---- 1 file changed, 319 insertions(+), 98 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 0ba5bb581cc4..2efd5f3f186f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -173,6 +173,12 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE); }; + Consumer> validateResponseHasRequestTimeoutException = (responseWrapper) -> { + assertThat(responseWrapper.cosmosException).isNotNull(); + assertThat(responseWrapper.cosmosException.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.REQUEST_TIMEOUT); + assertThat(responseWrapper.cosmosException.getSubStatusCode()).isNotEqualTo(HttpConstants.SubStatusCodes.CLIENT_OPERATION_TIMEOUT); + }; + private final Function> buildServiceUnavailableError = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; @@ -261,7 +267,18 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { Function> generateRetryWithRules = PartitionLevelCircuitBreakerTests::buildRetryWithFaultInjectionRules; + // General testing flow: + // Below tests choose a fault type to inject, regions to inject the fault in + // and the operation type for which the fault is injected. The idea is to assert + // what happens when faults are being injected - should an exception bubble up + // in the process [or] should the operation succeed, region contacted when circuit + // breaking has kicked in and region contacted when region + partition combination is + // being marked back as UnhealthyTentative (eligible to accept requests) return new Object[][]{ + // Server-generated 503 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -279,6 +296,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), new FaultInjectionRuleParamsWrapper() @@ -296,6 +318,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -313,6 +340,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for DELETE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -331,6 +363,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for PATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), new FaultInjectionRuleParamsWrapper() @@ -348,6 +385,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -365,6 +407,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but include + // the second preferred region when the first preferred region has been short-circuited. + // For queries which require a QueryPlan, the first preferred region is contacted (not a data plane request + // which will hit a data partition so is not eligible for circuit breaking). new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -382,6 +430,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for BATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), new FaultInjectionRuleParamsWrapper() @@ -399,6 +452,10 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() @@ -416,6 +473,10 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -433,6 +494,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), new FaultInjectionRuleParamsWrapper() @@ -450,6 +516,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -467,6 +538,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for DELETE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -484,6 +560,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for PATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), new FaultInjectionRuleParamsWrapper() @@ -501,6 +582,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -518,6 +604,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. Even + // when short-circuiting of first preferred region has kicked in, the first preferred region is contacted + // to fetch the QueryPlan. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -535,6 +627,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Response-delay injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) + // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -553,6 +650,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Response-delay injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) + // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -571,6 +673,58 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Response-delay injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit RequestTimeoutException (due to network request timeout of 5s kicking in) + // and because NonIdempotentWriteRetryPolicy isn't enabled + // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region and with no end-to-end operation timeout configured.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + generateTransitTimeoutRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasRequestTimeoutException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + // Response-delay injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit RequestTimeoutException (due to network request timeout of 5s kicking in) + // and because NonIdempotentWriteRetryPolicy isn't enabled + // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with response delay in first preferred region and with no end-to-end operation timeout configured.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)) + .withResponseDelay(Duration.ofSeconds(6)), + generateTransitTimeoutRules, + noEndToEndTimeout, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasRequestTimeoutException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT + }, + // 500 (internal server error) injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -588,6 +742,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -605,6 +764,10 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() @@ -622,6 +785,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. Although, after short-circuiting, a query operation + // will see request for QueryPlan from the short-circuited region. { String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -639,6 +808,10 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -656,6 +829,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -673,6 +851,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. + // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -690,6 +873,10 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 404/1002 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -707,6 +894,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 404/1002 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -724,6 +916,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 449 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -741,6 +938,10 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 503 injected into all regions for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. new Object[]{ String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -758,6 +959,11 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 503 injected into all regions for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. new Object[]{ String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), new FaultInjectionRuleParamsWrapper() @@ -775,6 +981,10 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 503 injected into all regions for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. new Object[] { String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -792,95 +1002,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. new Object[]{ - String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)) - .withResponseDelay(Duration.ofSeconds(6)), - generateTransitTimeoutRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)) - .withResponseDelay(Duration.ofSeconds(6)), - generateTransitTimeoutRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), - generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateResponseHasInternalServerError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), - generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateResponseHasInternalServerError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT - }, - new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region.", FaultInjectionOperationType.READ_ITEM), + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) @@ -896,8 +1023,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) + // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) @@ -913,8 +1044,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) + // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) @@ -962,6 +1097,10 @@ public Object[][] readManyTestConfigs() { }; return new Object[][]{ + // Server-generated 503 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. { "Test read many operation injected with service unavailable exception in first preferred region.", new FaultInjectionRuleParamsWrapper() @@ -979,8 +1118,12 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // Server-generated 410 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { - "Test read many operation injected with server-generated GONE in first preferred region.", + "Test read many operation injected with server-generated gone in first preferred region.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) @@ -996,6 +1139,10 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read many operation injected with too many requests error in first preferred region.", new FaultInjectionRuleParamsWrapper() @@ -1013,8 +1160,12 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 404/1002 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { - "Test read many operation injected with read/write session not available error in first preferred region.", + "Test read many operation injected with read session not available error in first preferred region.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) @@ -1030,6 +1181,11 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 503 injected into all region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 and only to succeed once + // fault injection has hit its injection limits. Also, the success is + // from the first preferred region. { "Test read many operation injected with service unavailable error in all regions.", new FaultInjectionRuleParamsWrapper() @@ -1046,6 +1202,28 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT + }, + // 429 injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsError, + executeReadManyOperation, + twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT } }; } @@ -1053,7 +1231,7 @@ public Object[][] readManyTestConfigs() { @DataProvider(name = "readAllTestConfigs") public Object[][] readAllTestConfigs() { - Function> executeReadManyOperation = (paramsWrapper) -> { + Function> executeReadAllOperation = (paramsWrapper) -> { CosmosAsyncContainer asyncContainer = paramsWrapper.asyncContainer; PartitionKey partitionKey = paramsWrapper.partitionKeyForReadAllOperation; CosmosQueryRequestOptions queryRequestOptions = paramsWrapper.queryRequestOptions; @@ -1081,6 +1259,10 @@ public Object[][] readAllTestConfigs() { }; return new Object[][]{ + // Server-generated 503 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. { "Test read all operation injected with service unavailable exception in first preferred region.", new FaultInjectionRuleParamsWrapper() @@ -1088,7 +1270,7 @@ public Object[][] readAllTestConfigs() { .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServiceUnavailableError, - executeReadManyOperation, + executeReadAllOperation, noEndToEndTimeout, noRegionSwitchHint, this.validateResponseHasSuccess, @@ -1098,6 +1280,10 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read all operation injected with server-generated GONE in first preferred region.", new FaultInjectionRuleParamsWrapper() @@ -1105,7 +1291,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServerGeneratedGoneError, - executeReadManyOperation, + executeReadAllOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, this.validateResponseHasOperationCancelledException, @@ -1115,6 +1301,10 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 429 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read all operation injected with too many requests error in first preferred region.", new FaultInjectionRuleParamsWrapper() @@ -1122,7 +1312,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildTooManyRequestsError, - executeReadManyOperation, + executeReadAllOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, this.validateResponseHasOperationCancelledException, @@ -1132,6 +1322,10 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 404/1002 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read all operation injected with read/write session not available error in first preferred region.", new FaultInjectionRuleParamsWrapper() @@ -1139,7 +1333,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildReadWriteSessionNotAvailableRules, - executeReadManyOperation, + executeReadAllOperation, twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, noRegionSwitchHint, this.validateResponseHasOperationCancelledException, @@ -1149,6 +1343,11 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT }, + // 503 injected into all region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 and only to succeed once + // fault injection has hit its injection limits. Also, the success is + // from the first preferred region. { "Test read all operation injected with service unavailable error in all regions.", new FaultInjectionRuleParamsWrapper() @@ -1156,7 +1355,7 @@ public Object[][] readAllTestConfigs() { .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions), this.buildServiceUnavailableError, - executeReadManyOperation, + executeReadAllOperation, noEndToEndTimeout, noRegionSwitchHint, this.validateResponseHasServiceUnavailableError, @@ -1165,6 +1364,28 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ConnectionMode.DIRECT + }, + // 429 injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsError, + executeReadAllOperation, + twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, + noRegionSwitchHint, + !nonIdempotentWriteRetriesEnabled, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ConnectionMode.DIRECT } }; } From ef2775eae2ce0e8725ad780fa7de1add9cf596dc Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 20 Jun 2024 12:20:00 -0400 Subject: [PATCH 086/140] Adding code comments. --- .../implementation/PartitionLevelCircuitBreakerTests.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 2efd5f3f186f..ec3cfa3624df 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -1208,7 +1208,7 @@ public Object[][] readManyTestConfigs() { // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), + "Test faulty read many operation with too many requests error in first preferred region with threshold-based availability strategy enabled.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) @@ -1370,7 +1370,7 @@ public Object[][] readAllTestConfigs() { // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), + "Test faulty read all operation with too many requests error in first preferred region with threshold-based availability strategy enabled.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) From 4e31fbe8a18463a365f21a4ba0dc5b16d73b1dbc Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 21 Jun 2024 14:28:19 -0400 Subject: [PATCH 087/140] Modifying way to configure circuit breaker thresholds. --- .../cosmos/benchmark/AsyncBenchmark.java | 15 ++- .../azure/cosmos/benchmark/Configuration.java | 14 +++ .../azure/cosmos/benchmark/SyncBenchmark.java | 7 +- .../ClientConfigDiagnosticsTest.java | 8 +- ...EndpointManagerForCircuitBreakerTests.java | 48 +++------- .../PartitionLevelCircuitBreakerTests.java | 8 +- ...nsecutiveExceptionBasedCircuitBreaker.java | 94 ++----------------- .../PartitionLevelCircuitBreakerConfig.java | 22 ++++- 8 files changed, 81 insertions(+), 135 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java index df76564b8bdf..c7a349e28891 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java @@ -89,9 +89,13 @@ abstract class AsyncBenchmark { configuration = cfg; if (configuration.isPartitionLevelCircuitBreakerEnabled()) { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); } CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() @@ -144,8 +148,11 @@ abstract class AsyncBenchmark { cosmosClientBuilder = cosmosClientBuilder.gatewayMode(gatewayConnectionConfig); } - CosmosClient syncClient = cosmosClientBuilder.buildClient(); cosmosClient = cosmosClientBuilder.buildAsyncClient(); + CosmosClient syncClient = cosmosClientBuilder + .endpoint(configuration.getServiceEndpointForRunResultsUploadAccount()) + .key(configuration.getMasterKeyForRunResultsUploadAccount()) + .buildClient(); try { cosmosAsyncDatabase = cosmosClient.getDatabase(this.configuration.getDatabaseId()); diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java index 17d26a7f72c3..0de2957d4385 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/Configuration.java @@ -47,6 +47,12 @@ public class Configuration { @Parameter(names = "-masterKey", description = "Master Key") private String masterKey; + @Parameter(names = "-serviceEndpointForResultsUploadAccount", description = "Service Endpoint for run results upload account") + private String serviceEndpointForRunResultsUploadAccount; + + @Parameter(names = "-masterKeyForResultsUploadAccount", description = "Master Key for run results upload account") + private String masterKeyForRunResultsUploadAccount; + @Parameter(names = "-databaseId", description = "Database ID") private String databaseId; @@ -400,6 +406,14 @@ public String getMasterKey() { return masterKey; } + public String getServiceEndpointForRunResultsUploadAccount() { + return serviceEndpointForRunResultsUploadAccount; + } + + public String getMasterKeyForRunResultsUploadAccount() { + return masterKeyForRunResultsUploadAccount; + } + public String getApplicationName() { return applicationName; } diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java index e5666826c884..fdefad27851e 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java @@ -153,6 +153,11 @@ public T apply(T o, Throwable throwable) { } cosmosClient = cosmosClientBuilder.buildClient(); + CosmosClient syncClient = cosmosClientBuilder + .endpoint(configuration.getServiceEndpointForRunResultsUploadAccount()) + .key(configuration.getMasterKeyForRunResultsUploadAccount()) + .buildClient(); + try { cosmosDatabase = cosmosClient.getDatabase(this.configuration.getDatabaseId()); cosmosDatabase.read(); @@ -252,7 +257,7 @@ public T apply(T o, Throwable throwable) { resultReporter = CosmosTotalResultReporter .forRegistry( metricsRegistry, - cosmosClient.getDatabase(configuration.getResultUploadDatabase()).getContainer(configuration.getResultUploadContainer()), + syncClient.getDatabase(configuration.getResultUploadDatabase()).getContainer(configuration.getResultUploadContainer()), configuration) .convertRatesTo(TimeUnit.SECONDS) .convertDurationsTo(TimeUnit.MILLISECONDS).build(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java index 03790701c060..c3e720a90d95 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ClientConfigDiagnosticsTest.java @@ -286,8 +286,10 @@ public void full( System.setProperty( "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); diagnosticsClientConfig.withPartitionLevelCircuitBreakerConfig(partitionLevelCircuitBreakerConfig); @@ -318,7 +320,7 @@ public void full( } if (isPartitionLevelCircuitBreakerEnabled) { - assertThat(objectNode.get("partitionLevelCircuitBreakerCfg").asText()).isEqualTo("(cb: true, type: COUNT_BASED, tl: LOW)"); + assertThat(objectNode.get("partitionLevelCircuitBreakerCfg").asText()).isEqualTo("(cb: true, type: CONSECUTIVE_EXCEPTION_COUNT_BASED, rexcntt: 10, wexcntt: 5)"); } else { assertThat(objectNode.get("partitionLevelCircuitBreakerCfg")).isNull(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index e997ac968c3e..70c94d04b0f2 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -71,41 +71,21 @@ public void beforeClass() { @DataProvider(name = "partitionLevelCircuitBreakerConfigs") public Object[][] partitionLevelCircuitBreakerConfigs() { - return new Object[][] { - new Object[] { + return new Object[][]{ + new Object[]{ "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"LOW\"}", + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}", READ_OPERATION_TRUE }, - new Object[] { + new Object[]{ "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"MEDIUM\"}", - READ_OPERATION_TRUE - }, - new Object[] { - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"HIGH\"}", - READ_OPERATION_TRUE - }, - new Object[] { - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"LOW\"}", - !READ_OPERATION_TRUE - }, - new Object[] { - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"MEDIUM\"}", - !READ_OPERATION_TRUE - }, - new Object[] { - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"HIGH\"}", + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}", !READ_OPERATION_TRUE } }; @@ -181,9 +161,9 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL String collectionResourceId = "dbs/db1/colls/coll1"; List applicableReadWriteEndpoints = ImmutableList.of( - LocationEastUs2EndpointToLocationPair, - LocationEastUsEndpointToLocationPair, - LocationCentralUsEndpointToLocationPair) + LocationEastUs2EndpointToLocationPair, + LocationEastUsEndpointToLocationPair, + LocationCentralUsEndpointToLocationPair) .stream() .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) .collect(Collectors.toList()); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index ec3cfa3624df..0d7d96b8527b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -1217,7 +1217,6 @@ public Object[][] readManyTestConfigs() { executeReadManyOperation, twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, @@ -1379,7 +1378,6 @@ public Object[][] readAllTestConfigs() { executeReadAllOperation, twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, @@ -1729,8 +1727,10 @@ private void execute( System.setProperty( "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); if (regionSwitchHint != null) { clientBuilder = clientBuilder diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 2fbe2821f721..867fcef2d871 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -29,19 +29,20 @@ public LocationSpecificContext handleException(LocationSpecificContext locationS case HealthyTentative: exceptionCountAfterHandling++; + int successCountAfterHandling = 0; if (isReadOnlyRequest) { return new LocationSpecificContext( locationSpecificContext.getSuccessCountForWrite(), locationSpecificContext.getExceptionCountForWrite(), - locationSpecificContext.getSuccessCountForRead(), + successCountAfterHandling, exceptionCountAfterHandling, locationSpecificContext.getUnavailableSince(), locationSpecificContext.getLocationHealthStatus(), locationSpecificContext.isExceptionThresholdBreached()); } else { return new LocationSpecificContext( - locationSpecificContext.getSuccessCountForWrite(), + successCountAfterHandling, exceptionCountAfterHandling, locationSpecificContext.getSuccessCountForRead(), locationSpecificContext.getExceptionCountForRead(), @@ -134,36 +135,9 @@ public boolean canHealthStatusBeUpgraded(LocationSpecificContext locationSpecifi int successCountActual = isReadOnlyRequest ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForWrite(); - int exceptionCountActual - = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); - LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); - return successCountActual >= getMinimumSuccessCountForStatusUpgrade(locationHealthStatus, isReadOnlyRequest) && - (double) exceptionCountActual / (double) successCountActual < getAllowedExceptionToSuccessRatio(locationHealthStatus, isReadOnlyRequest); - } - - private static double getAllowedExceptionToSuccessRatio(LocationHealthStatus status, boolean isReadOnlyRequest) { - - if (isReadOnlyRequest) { - switch (status) { - case HealthyWithFailures: - return 0.3d; - case HealthyTentative: - return 0.1d; - default: - return 0d; - } - } else { - switch (status) { - case HealthyWithFailures: - return 0.2d; - case HealthyTentative: - return 0.05d; - default: - return 0d; - } - } + return successCountActual >= getMinimumSuccessCountForStatusUpgrade(locationHealthStatus, isReadOnlyRequest); } public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, boolean isReadOnlyRequest) { @@ -171,25 +145,9 @@ public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, if (isReadOnlyRequest) { switch (status) { case HealthyWithFailures: - if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { - return 10; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { - return 20; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { - return 40; - } else { - throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); - } + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads(); case HealthyTentative: - if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { - return 5; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { - return 10; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { - return 20; - } else { - throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); - } + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads() / 2; case Healthy: case Unavailable: return 0; @@ -199,25 +157,9 @@ public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, } else { switch (status) { case HealthyWithFailures: - if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { - return 5; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { - return 10; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { - return 20; - } else { - throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); - } + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites(); case HealthyTentative: - if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { - return 10; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { - return 5; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { - return 3; - } else { - throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); - } + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites() / 2; case Healthy: return 0; default: @@ -230,15 +172,7 @@ public int getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus status, b if (isReadOnlyRequest) { switch (status) { case HealthyTentative: - if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { - return 10; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { - return 5; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { - return 3; - } else { - throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); - } + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads(); case Unavailable: case HealthyWithFailures: case Healthy: @@ -249,15 +183,7 @@ public int getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus status, b } else { switch (status) { case HealthyTentative: - if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("LOW")) { - return 20; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("MEDIUM")) { - return 10; - } else if (this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance().equals("HIGH")) { - return 5; - } else { - throw new IllegalArgumentException("Unsupported tolerance setting " + this.partitionLevelCircuitBreakerConfig.getCircuitBreakerFailureTolerance()); - } + return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites(); case Unavailable: case HealthyWithFailures: case Healthy: diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java index 0d37e4ab7b94..a3c70f2a1b13 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionLevelCircuitBreakerConfig.java @@ -20,11 +20,15 @@ public class PartitionLevelCircuitBreakerConfig { @JsonSetter(nulls = Nulls.SKIP) @JsonProperty - private String circuitBreakerType = "COUNT_BASED"; + private String circuitBreakerType = "CONSECUTIVE_EXCEPTION_COUNT_BASED"; @JsonSetter(nulls = Nulls.SKIP) @JsonProperty - private String circuitBreakerFailureTolerance = "LOW"; + private int consecutiveExceptionCountToleratedForReads = 10; + + @JsonSetter(nulls = Nulls.SKIP) + @JsonProperty + private int consecutiveExceptionCountToleratedForWrites = 5; private String cachedConfigAsString = ""; @@ -32,12 +36,17 @@ public Boolean isPartitionLevelCircuitBreakerEnabled() { return isPartitionLevelCircuitBreakerEnabled; } + // todo (abhmohanty): keep this method around for future-proofing (adding more circuit breaker types) public String getCircuitBreakerType() { return circuitBreakerType; } - public String getCircuitBreakerFailureTolerance() { - return circuitBreakerFailureTolerance; + public int getConsecutiveExceptionCountToleratedForReads() { + return consecutiveExceptionCountToleratedForReads; + } + + public int getConsecutiveExceptionCountToleratedForWrites() { + return consecutiveExceptionCountToleratedForWrites; } public String toJson() { @@ -49,11 +58,14 @@ public String toJson() { } public String getConfigAsString() { + if (StringUtils.isEmpty(this.cachedConfigAsString)) { this.cachedConfigAsString = "(" + "cb: " + this.isPartitionLevelCircuitBreakerEnabled + ", " + "type: " + this.circuitBreakerType + ", " + - "tl: " + this.circuitBreakerFailureTolerance + ")"; + "rexcntt: " + this.consecutiveExceptionCountToleratedForReads + ", " + + "wexcntt: " + this.consecutiveExceptionCountToleratedForWrites + ")"; } + return this.cachedConfigAsString; } From 007a1c22e5823f06cc1c572b4f00b89ef688a2ad Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 24 Jun 2024 10:44:27 -0400 Subject: [PATCH 088/140] Adding capability to specify alternate account to upload benchmark results. --- ...EndpointManagerForCircuitBreakerTests.java | 103 +++++++++-------- .../PartitionLevelCircuitBreakerTests.java | 12 +- ...nsecutiveExceptionBasedCircuitBreaker.java | 106 +++++++++--------- ...itionEndpointManagerForCircuitBreaker.java | 46 ++++---- ...ava => LocationSpecificHealthContext.java} | 4 +- ...ecificHealthContextTransitionHandler.java} | 59 +++++----- 6 files changed, 163 insertions(+), 167 deletions(-) rename sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/{LocationSpecificContext.java => LocationSpecificHealthContext.java} (96%) rename sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/{LocationContextTransitionHandler.java => LocationSpecificHealthContextTransitionHandler.java} (82%) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 70c94d04b0f2..fc235134f540 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -6,7 +6,7 @@ import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; -import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificContext; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import org.apache.commons.lang3.tuple.Pair; @@ -18,13 +18,10 @@ import org.testng.annotations.Test; import java.lang.reflect.Field; -import java.lang.reflect.InvocationTargetException; -import java.lang.reflect.Method; import java.net.URI; import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; @@ -135,14 +132,14 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); - LocationSpecificContext locationSpecificContext + LocationSpecificHealthContext locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @@ -202,14 +199,14 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); - LocationSpecificContext locationSpecificContext + LocationSpecificHealthContext locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @@ -274,14 +271,14 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); - LocationSpecificContext locationSpecificContext + LocationSpecificHealthContext locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @@ -348,14 +345,14 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); - LocationSpecificContext locationSpecificContext + LocationSpecificHealthContext locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); try { Thread.sleep(65_000); @@ -363,10 +360,10 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition throw new RuntimeException(ex); } - locationSpecificContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @@ -433,14 +430,14 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); - LocationSpecificContext locationSpecificContext + LocationSpecificHealthContext locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); try { Thread.sleep(65_000); @@ -448,7 +445,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve throw new RuntimeException(ex); } - locationSpecificContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); int successCountToUpgradeStatus = globalPartitionEndpointManagerForCircuitBreaker.getConsecutiveExceptionBasedCircuitBreaker().getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus.HealthyTentative, readOperationTrue); @@ -457,8 +454,8 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve .handleLocationSuccessForPartitionKeyRange(request); } - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isTrue(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isFalse(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isFalse(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @@ -525,14 +522,14 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper(request.requestContext.resolvedPartitionKeyRange, collectionResourceId)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); - LocationSpecificContext locationSpecificContext + LocationSpecificHealthContext locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); try { Thread.sleep(65_000); @@ -547,10 +544,10 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC .handleLocationExceptionForPartitionKeyRange(request, LocationEastUs2EndpointToLocationPair.getKey()); } - locationSpecificContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); + locationSpecificHealthContext = locationEndpointToLocationSpecificContextForPartition.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext.isRegionAvailableToProcessRequests()).isFalse(); - assertThat(locationSpecificContext.isExceptionThresholdBreached()).isTrue(); + assertThat(locationSpecificHealthContext.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } @@ -699,27 +696,27 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId1)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl1 - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl1); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl1 + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl1); Object partitionLevelLocationUnavailabilityInfoSnapshotForColl2 = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(new PartitionKeyRangeWrapper( new PartitionKeyRange(pkRangeId, minInclusive, maxExclusive), collectionResourceId2)); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl2 - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl2); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartitionForColl2 + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionLevelLocationUnavailabilityInfoSnapshotForColl2); - LocationSpecificContext locationSpecificContext1 + LocationSpecificHealthContext locationSpecificHealthContext1 = locationEndpointToLocationSpecificContextForPartitionForColl1.get(LocationEastUs2EndpointToLocationPair.getKey()); - LocationSpecificContext locationSpecificContext2 + LocationSpecificHealthContext locationSpecificHealthContext2 = locationEndpointToLocationSpecificContextForPartitionForColl2.get(LocationEastUs2EndpointToLocationPair.getKey()); - assertThat(locationSpecificContext1.isRegionAvailableToProcessRequests()).isFalse(); - assertThat(locationSpecificContext1.isExceptionThresholdBreached()).isTrue(); + assertThat(locationSpecificHealthContext1.isRegionAvailableToProcessRequests()).isFalse(); + assertThat(locationSpecificHealthContext1.isExceptionThresholdBreached()).isTrue(); - assertThat(locationSpecificContext2.isRegionAvailableToProcessRequests()).isTrue(); - assertThat(locationSpecificContext2.isExceptionThresholdBreached()).isFalse(); + assertThat(locationSpecificHealthContext2.isRegionAvailableToProcessRequests()).isTrue(); + assertThat(locationSpecificHealthContext2.isExceptionThresholdBreached()).isFalse(); System.clearProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 0d7d96b8527b..be9f5ab37a44 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -23,7 +23,7 @@ import com.azure.cosmos.implementation.circuitBreaker.ConsecutiveExceptionBasedCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; -import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificContext; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; @@ -2662,17 +2662,17 @@ private static int getAverageExceptionCountByPartitionKeyRangeByRegion( Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); - ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition - = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition + = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); int count = 0; int regionCountWithFailures = 0; boolean failuresExist = false; - for (LocationSpecificContext locationSpecificContext : locationEndpointToLocationSpecificContextForPartition.values()) { - count += locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite(); + for (LocationSpecificHealthContext locationSpecificHealthContext : locationEndpointToLocationSpecificContextForPartition.values()) { + count += locationSpecificHealthContext.getExceptionCountForRead() + locationSpecificHealthContext.getExceptionCountForWrite(); - if (locationSpecificContext.getExceptionCountForRead() + locationSpecificContext.getExceptionCountForWrite() > 0) { + if (locationSpecificHealthContext.getExceptionCountForRead() + locationSpecificHealthContext.getExceptionCountForWrite() > 0) { failuresExist = true; regionCountWithFailures++; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 867fcef2d871..0fc88faa4803 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -15,16 +15,16 @@ public ConsecutiveExceptionBasedCircuitBreaker(PartitionLevelCircuitBreakerConfi this.partitionLevelCircuitBreakerConfig = partitionLevelCircuitBreakerConfig; } - public LocationSpecificContext handleException(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + public LocationSpecificHealthContext handleException(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int exceptionCountAfterHandling - = (isReadOnlyRequest) ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); - LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); switch (locationHealthStatus) { case Healthy: - return locationSpecificContext; + return locationSpecificHealthContext; case HealthyWithFailures: case HealthyTentative: @@ -32,23 +32,23 @@ public LocationSpecificContext handleException(LocationSpecificContext locationS int successCountAfterHandling = 0; if (isReadOnlyRequest) { - return new LocationSpecificContext( - locationSpecificContext.getSuccessCountForWrite(), - locationSpecificContext.getExceptionCountForWrite(), + return new LocationSpecificHealthContext( + locationSpecificHealthContext.getSuccessCountForWrite(), + locationSpecificHealthContext.getExceptionCountForWrite(), successCountAfterHandling, exceptionCountAfterHandling, - locationSpecificContext.getUnavailableSince(), - locationSpecificContext.getLocationHealthStatus(), - locationSpecificContext.isExceptionThresholdBreached()); + locationSpecificHealthContext.getUnavailableSince(), + locationSpecificHealthContext.getLocationHealthStatus(), + locationSpecificHealthContext.isExceptionThresholdBreached()); } else { - return new LocationSpecificContext( + return new LocationSpecificHealthContext( successCountAfterHandling, exceptionCountAfterHandling, - locationSpecificContext.getSuccessCountForRead(), - locationSpecificContext.getExceptionCountForRead(), - locationSpecificContext.getUnavailableSince(), - locationSpecificContext.getLocationHealthStatus(), - locationSpecificContext.isExceptionThresholdBreached()); + locationSpecificHealthContext.getSuccessCountForRead(), + locationSpecificHealthContext.getExceptionCountForRead(), + locationSpecificHealthContext.getUnavailableSince(), + locationSpecificHealthContext.getLocationHealthStatus(), + locationSpecificHealthContext.isExceptionThresholdBreached()); } case Unavailable: throw new IllegalStateException(); @@ -57,63 +57,63 @@ public LocationSpecificContext handleException(LocationSpecificContext locationS } } - public LocationSpecificContext handleSuccess(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int exceptionCountAfterHandling - = (isReadOnlyRequest) ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); int successCountAfterHandling - = (isReadOnlyRequest) ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForWrite(); + = (isReadOnlyRequest) ? locationSpecificHealthContext.getSuccessCountForRead() : locationSpecificHealthContext.getSuccessCountForWrite(); - LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); switch (locationHealthStatus) { case Healthy: - return locationSpecificContext; + return locationSpecificHealthContext; case HealthyWithFailures: exceptionCountAfterHandling = 0; if (isReadOnlyRequest) { - return new LocationSpecificContext( - locationSpecificContext.getSuccessCountForWrite(), - locationSpecificContext.getExceptionCountForWrite(), - locationSpecificContext.getSuccessCountForRead(), + return new LocationSpecificHealthContext( + locationSpecificHealthContext.getSuccessCountForWrite(), + locationSpecificHealthContext.getExceptionCountForWrite(), + locationSpecificHealthContext.getSuccessCountForRead(), exceptionCountAfterHandling, - locationSpecificContext.getUnavailableSince(), - locationSpecificContext.getLocationHealthStatus(), - locationSpecificContext.isExceptionThresholdBreached()); + locationSpecificHealthContext.getUnavailableSince(), + locationSpecificHealthContext.getLocationHealthStatus(), + locationSpecificHealthContext.isExceptionThresholdBreached()); } else { - return new LocationSpecificContext( - locationSpecificContext.getSuccessCountForWrite(), + return new LocationSpecificHealthContext( + locationSpecificHealthContext.getSuccessCountForWrite(), exceptionCountAfterHandling, - locationSpecificContext.getSuccessCountForRead(), - locationSpecificContext.getExceptionCountForRead(), - locationSpecificContext.getUnavailableSince(), - locationSpecificContext.getLocationHealthStatus(), - locationSpecificContext.isExceptionThresholdBreached()); + locationSpecificHealthContext.getSuccessCountForRead(), + locationSpecificHealthContext.getExceptionCountForRead(), + locationSpecificHealthContext.getUnavailableSince(), + locationSpecificHealthContext.getLocationHealthStatus(), + locationSpecificHealthContext.isExceptionThresholdBreached()); } case HealthyTentative: successCountAfterHandling++; if (isReadOnlyRequest) { - return new LocationSpecificContext( - locationSpecificContext.getSuccessCountForWrite(), - locationSpecificContext.getExceptionCountForWrite(), + return new LocationSpecificHealthContext( + locationSpecificHealthContext.getSuccessCountForWrite(), + locationSpecificHealthContext.getExceptionCountForWrite(), successCountAfterHandling, exceptionCountAfterHandling, - locationSpecificContext.getUnavailableSince(), - locationSpecificContext.getLocationHealthStatus(), - locationSpecificContext.isExceptionThresholdBreached()); + locationSpecificHealthContext.getUnavailableSince(), + locationSpecificHealthContext.getLocationHealthStatus(), + locationSpecificHealthContext.isExceptionThresholdBreached()); } else { - return new LocationSpecificContext( + return new LocationSpecificHealthContext( successCountAfterHandling, exceptionCountAfterHandling, - locationSpecificContext.getSuccessCountForRead(), - locationSpecificContext.getExceptionCountForRead(), - locationSpecificContext.getUnavailableSince(), - locationSpecificContext.getLocationHealthStatus(), - locationSpecificContext.isExceptionThresholdBreached()); + locationSpecificHealthContext.getSuccessCountForRead(), + locationSpecificHealthContext.getExceptionCountForRead(), + locationSpecificHealthContext.getUnavailableSince(), + locationSpecificHealthContext.getLocationHealthStatus(), + locationSpecificHealthContext.isExceptionThresholdBreached()); } case Unavailable: throw new IllegalStateException(); @@ -122,20 +122,20 @@ public LocationSpecificContext handleSuccess(LocationSpecificContext locationSpe } } - public boolean shouldHealthStatusBeDowngraded(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + public boolean shouldHealthStatusBeDowngraded(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int exceptionCountActual - = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); - return exceptionCountActual >= getAllowedExceptionCountToMaintainStatus(locationSpecificContext.getLocationHealthStatus(), isReadOnlyRequest); + return exceptionCountActual >= getAllowedExceptionCountToMaintainStatus(locationSpecificHealthContext.getLocationHealthStatus(), isReadOnlyRequest); } - public boolean canHealthStatusBeUpgraded(LocationSpecificContext locationSpecificContext, boolean isReadOnlyRequest) { + public boolean canHealthStatusBeUpgraded(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int successCountActual - = isReadOnlyRequest ? locationSpecificContext.getSuccessCountForRead() : locationSpecificContext.getSuccessCountForWrite(); + = isReadOnlyRequest ? locationSpecificHealthContext.getSuccessCountForRead() : locationSpecificHealthContext.getSuccessCountForWrite(); - LocationHealthStatus locationHealthStatus = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); return successCountActual >= getMinimumSuccessCountForStatusUpgrade(locationHealthStatus, isReadOnlyRequest); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 9d0453b35756..b76ed7c3a329 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -35,7 +35,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final GlobalEndpointManager globalEndpointManager; private final ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo; private final ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions; - private final LocationContextTransitionHandler locationContextTransitionHandler; + private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { @@ -45,7 +45,7 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); this.consecutiveExceptionBasedCircuitBreaker = new ConsecutiveExceptionBasedCircuitBreaker(partitionLevelCircuitBreakerConfig); - this.locationContextTransitionHandler = new LocationContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); + this.locationSpecificHealthContextTransitionHandler = new LocationSpecificHealthContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); } public void init() { @@ -157,14 +157,14 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String coll List unavailableLocations = new ArrayList<>(); if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { - Map locationEndpointToFailureMetricsForPartition = + Map locationEndpointToFailureMetricsForPartition = partitionLevelLocationUnavailabilityInfoSnapshot.locationEndpointToLocationSpecificContextForPartition; - for (Map.Entry pair : locationEndpointToFailureMetricsForPartition.entrySet()) { + for (Map.Entry pair : locationEndpointToFailureMetricsForPartition.entrySet()) { URI location = pair.getKey(); - LocationSpecificContext locationSpecificContext = pair.getValue(); + LocationSpecificHealthContext locationSpecificHealthContext = pair.getValue(); - if (locationSpecificContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { + if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { unavailableLocations.add(location); } } @@ -188,7 +188,7 @@ private Flux updateStaleLocationInfo() { PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); if (partitionLevelLocationUnavailabilityInfo != null) { - for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.entrySet()) { + for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.entrySet()) { URI locationWithStaleUnavailabilityInfo = locationToLocationLevelMetrics.getKey(); @@ -196,7 +196,7 @@ private Flux updateStaleLocationInfo() { if (locationSpecificContextAsVal != null) { locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationContextTransitionHandler.handleSuccess( + .this.locationSpecificHealthContextTransitionHandler.handleSuccess( locationSpecificContextAsVal, partitionKeyRangeWrapper, locationWithStaleUnavailabilityInfoAsKey, @@ -242,14 +242,14 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques private class PartitionLevelLocationUnavailabilityInfo { - private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; + private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; private final ConcurrentHashMap regionToHealthStatus; - private final LocationContextTransitionHandler locationContextTransitionHandler; + private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; private PartitionLevelLocationUnavailabilityInfo() { this.locationEndpointToLocationSpecificContextForPartition = new ConcurrentHashMap<>(); this.regionToHealthStatus = new ConcurrentHashMap<>(); - this.locationContextTransitionHandler = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationContextTransitionHandler; + this.locationSpecificHealthContextTransitionHandler = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationSpecificHealthContextTransitionHandler; } private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException, boolean isReadOnlyRequest) { @@ -259,7 +259,7 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe this.locationEndpointToLocationSpecificContextForPartition.compute(locationWithException, (locationAsKey, locationSpecificContextAsVal) -> { if (locationSpecificContextAsVal == null) { - locationSpecificContextAsVal = new LocationSpecificContext( + locationSpecificContextAsVal = new LocationSpecificHealthContext( 0, 0, 0, @@ -269,7 +269,7 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe false); } - LocationSpecificContext locationSpecificContextAfterTransition = this.locationContextTransitionHandler.handleException( + LocationSpecificHealthContext locationSpecificHealthContextAfterTransition = this.locationSpecificHealthContextTransitionHandler.handleException( locationSpecificContextAsVal, partitionKeyRangeWrapper, GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionKeyRangesWithPossibleUnavailableRegions, @@ -280,10 +280,10 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe GlobalPartitionEndpointManagerForCircuitBreaker .this.globalEndpointManager .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), - locationSpecificContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); + locationSpecificHealthContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); - isExceptionThresholdBreached.set(locationSpecificContextAfterTransition.isExceptionThresholdBreached()); - return locationSpecificContextAfterTransition; + isExceptionThresholdBreached.set(locationSpecificHealthContextAfterTransition.isExceptionThresholdBreached()); + return locationSpecificHealthContextAfterTransition; }); return isExceptionThresholdBreached.get(); @@ -292,10 +292,10 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation, boolean isReadOnlyRequest) { this.locationEndpointToLocationSpecificContextForPartition.compute(succeededLocation, (locationAsKey, locationSpecificContextAsVal) -> { - LocationSpecificContext locationSpecificContextAfterTransition; + LocationSpecificHealthContext locationSpecificHealthContextAfterTransition; if (locationSpecificContextAsVal == null) { - locationSpecificContextAsVal = new LocationSpecificContext( + locationSpecificContextAsVal = new LocationSpecificHealthContext( 0, 0, 0, @@ -305,7 +305,7 @@ private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, UR false); } - locationSpecificContextAfterTransition = this.locationContextTransitionHandler.handleSuccess( + locationSpecificHealthContextAfterTransition = this.locationSpecificHealthContextTransitionHandler.handleSuccess( locationSpecificContextAsVal, partitionKeyRangeWrapper, succeededLocation, @@ -316,9 +316,9 @@ private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, UR GlobalPartitionEndpointManagerForCircuitBreaker .this.globalEndpointManager .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), - locationSpecificContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); + locationSpecificHealthContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); - return locationSpecificContextAfterTransition; + return locationSpecificHealthContextAfterTransition; }); } @@ -328,9 +328,9 @@ public boolean areLocationsAvailableForPartitionKeyRange(List availableLoca if (!this.locationEndpointToLocationSpecificContextForPartition.containsKey(availableLocation)) { return true; } else { - LocationSpecificContext locationSpecificContextSnapshot = this.locationEndpointToLocationSpecificContextForPartition.get(availableLocation); + LocationSpecificHealthContext locationSpecificHealthContextSnapshot = this.locationEndpointToLocationSpecificContextForPartition.get(availableLocation); - if (locationSpecificContextSnapshot.isRegionAvailableToProcessRequests()) { + if (locationSpecificHealthContextSnapshot.isRegionAvailableToProcessRequests()) { return true; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java similarity index 96% rename from sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java rename to sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java index 88999c04fde9..ecd4c92e8fdb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java @@ -5,7 +5,7 @@ import java.time.Instant; -public class LocationSpecificContext { +public class LocationSpecificHealthContext { private final int exceptionCountForWrite; private final int successCountForWrite; private final int exceptionCountForRead; @@ -14,7 +14,7 @@ public class LocationSpecificContext { private final LocationHealthStatus locationHealthStatus; private final boolean isExceptionThresholdBreached; - public LocationSpecificContext( + public LocationSpecificHealthContext( int successCountForWrite, int exceptionCountForWrite, int successCountForRead, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java similarity index 82% rename from sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationContextTransitionHandler.java rename to sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index 386a16420b55..59fa4b3b2400 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -11,17 +11,16 @@ import java.net.URI; import java.time.Duration; import java.time.Instant; -import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -public class LocationContextTransitionHandler { +public class LocationSpecificHealthContextTransitionHandler { - private static final Logger logger = LoggerFactory.getLogger(LocationContextTransitionHandler.class); + private static final Logger logger = LoggerFactory.getLogger(LocationSpecificHealthContextTransitionHandler.class); private final GlobalEndpointManager globalEndpointManager; private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; - public LocationContextTransitionHandler( + public LocationSpecificHealthContextTransitionHandler( GlobalEndpointManager globalEndpointManager, ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker) { @@ -29,17 +28,17 @@ public LocationContextTransitionHandler( this.consecutiveExceptionBasedCircuitBreaker = consecutiveExceptionBasedCircuitBreaker; } - public LocationSpecificContext handleSuccess( - LocationSpecificContext locationSpecificContext, + public LocationSpecificHealthContext handleSuccess( + LocationSpecificHealthContext locationSpecificHealthContext, PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithSuccess, boolean forceStatusChange, boolean isReadOnlyRequest) { - LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificHealthContext.getLocationHealthStatus(); int exceptionCountActual - = isReadOnlyRequest ? locationSpecificContext.getExceptionCountForRead() : locationSpecificContext.getExceptionCountForWrite(); + = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); switch (currentLocationHealthStatusSnapshot) { case Healthy: @@ -48,7 +47,7 @@ public LocationSpecificContext handleSuccess( if (!forceStatusChange) { if (exceptionCountActual > 0) { return this.consecutiveExceptionBasedCircuitBreaker - .handleSuccess(locationSpecificContext, isReadOnlyRequest); + .handleSuccess(locationSpecificHealthContext, isReadOnlyRequest); } } break; @@ -56,10 +55,10 @@ public LocationSpecificContext handleSuccess( case HealthyTentative: if (!forceStatusChange) { - LocationSpecificContext locationSpecificContextInner - = this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificContext, isReadOnlyRequest); + LocationSpecificHealthContext locationSpecificHealthContextInner + = this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificHealthContext, isReadOnlyRequest); - if (this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificContextInner, isReadOnlyRequest)) { + if (this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificHealthContextInner, isReadOnlyRequest)) { if (logger.isDebugEnabled()) { logger.debug("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", @@ -72,12 +71,12 @@ public LocationSpecificContext handleSuccess( return this.transitionHealthStatus(LocationHealthStatus.Healthy); } else { - return locationSpecificContextInner; + return locationSpecificHealthContextInner; } } break; case Unavailable: - Instant unavailableSinceActual = locationSpecificContext.getUnavailableSince(); + Instant unavailableSinceActual = locationSpecificHealthContext.getUnavailableSince(); if (!forceStatusChange) { if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { @@ -110,17 +109,17 @@ public LocationSpecificContext handleSuccess( throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); } - return locationSpecificContext; + return locationSpecificHealthContext; } - public LocationSpecificContext handleException( - LocationSpecificContext locationSpecificContext, + public LocationSpecificHealthContext handleException( + LocationSpecificHealthContext locationSpecificHealthContext, PartitionKeyRangeWrapper partitionKeyRangeWrapper, ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions, URI locationWithException, boolean isReadOnlyRequest) { - LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificContext.getLocationHealthStatus(); + LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificHealthContext.getLocationHealthStatus(); switch (currentLocationHealthStatusSnapshot) { case Healthy: @@ -136,22 +135,22 @@ public LocationSpecificContext handleException( return this.transitionHealthStatus(LocationHealthStatus.HealthyWithFailures); case HealthyWithFailures: - if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { + if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { - LocationSpecificContext locationSpecificContextInner - = this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); + LocationSpecificHealthContext locationSpecificHealthContextInner + = this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificHealthContext, isReadOnlyRequest); if (logger.isDebugEnabled()) { logger.debug("Partition {}-{} of collection : {} has exception count of {} for region : {}", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getResourceId(), - isReadOnlyRequest ? locationSpecificContextInner.getExceptionCountForRead() : locationSpecificContextInner.getExceptionCountForWrite(), + isReadOnlyRequest ? locationSpecificHealthContextInner.getExceptionCountForRead() : locationSpecificHealthContextInner.getExceptionCountForWrite(), this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } - return locationSpecificContextInner; + return locationSpecificHealthContextInner; } else { partitionKeyRangesWithPossibleUnavailableRegions.put(partitionKeyRangeWrapper, partitionKeyRangeWrapper); @@ -167,8 +166,8 @@ public LocationSpecificContext handleException( return this.transitionHealthStatus(LocationHealthStatus.Unavailable); } case HealthyTentative: - if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificContext, isReadOnlyRequest)) { - return this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificContext, isReadOnlyRequest); + if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { + return this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificHealthContext, isReadOnlyRequest); } else { if (logger.isDebugEnabled()) { @@ -187,11 +186,11 @@ public LocationSpecificContext handleException( } } - public LocationSpecificContext transitionHealthStatus(LocationHealthStatus newStatus) { + public LocationSpecificHealthContext transitionHealthStatus(LocationHealthStatus newStatus) { switch (newStatus) { case Healthy: - return new LocationSpecificContext( + return new LocationSpecificHealthContext( 0, 0, 0, @@ -200,7 +199,7 @@ public LocationSpecificContext transitionHealthStatus(LocationHealthStatus newSt LocationHealthStatus.Healthy, false); case HealthyWithFailures: - return new LocationSpecificContext( + return new LocationSpecificHealthContext( 0, 0, 0, @@ -209,7 +208,7 @@ public LocationSpecificContext transitionHealthStatus(LocationHealthStatus newSt LocationHealthStatus.HealthyWithFailures, false); case Unavailable: - return new LocationSpecificContext( + return new LocationSpecificHealthContext( 0, 0, 0, @@ -218,7 +217,7 @@ public LocationSpecificContext transitionHealthStatus(LocationHealthStatus newSt LocationHealthStatus.Unavailable, true); case HealthyTentative: - return new LocationSpecificContext( + return new LocationSpecificHealthContext( 0, 0, 0, From 9ba09a4a8ff9e4d955452aaf7d197e76da7a8946 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 24 Jun 2024 10:56:50 -0400 Subject: [PATCH 089/140] Fixing merge. --- .../query/ChangeFeedFetcher.java | 25 +++++++++++++------ 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java index 9d8798edb8f0..6ca5868f772e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ChangeFeedFetcher.java @@ -59,6 +59,22 @@ public ChangeFeedFetcher( checkNotNull(changeFeedState, "Argument 'changeFeedState' must not be null."); this.changeFeedState = changeFeedState; + // constructing retry policies for changeFeed requests + DocumentClientRetryPolicy retryPolicyInstance = + client.getResetSessionTokenRetryPolicy().getRequestPolicy(null); + + // For changeFeedProcessor with pkRange version, ChangeFeedState.containerRid will be name based rather than resouceId, + // due to the inconsistency of the ChangeFeedState.containerRid format, so in order to generate the correct path, + // we use a RxDocumentServiceRequest here + RxDocumentServiceRequest documentServiceRequest = createRequestFunc.get(); + String collectionLink = PathsHelper.generatePath( + ResourceType.DocumentCollection, documentServiceRequest, false); + retryPolicyInstance = new InvalidPartitionExceptionRetryPolicy( + client.getCollectionCache(), + retryPolicyInstance, + collectionLink, + requestOptionProperties); + if (isSplitHandlingDisabled) { // True for ChangeFeedProcessor - where all retry-logic is handled this.feedRangeContinuationRetryPolicy = retryPolicyInstance; @@ -87,17 +103,10 @@ public ChangeFeedFetcher( this.createRequestFunc = () -> { RxDocumentServiceRequest request = createRequestFunc.get(); + request.requestContext.setClientRetryPolicySupplier(() -> this.feedRangeContinuationRetryPolicy); this.feedRangeContinuationRetryPolicy.onBeforeSendRequest(request); return request; }; - - this.createRequestFunc = () -> { - RxDocumentServiceRequest request = createRequestFunc.get(); - request.requestContext.setClientRetryPolicySupplier(() -> this.feedRangeContinuationFeedRangeGoneRetryPolicy); - this.feedRangeContinuationFeedRangeGoneRetryPolicy.onBeforeSendRequest(request); - return request; - }; - } } @Override From 0f0455afb49267be19c9660a6f63e906c2d63da6 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 24 Jun 2024 11:29:20 -0400 Subject: [PATCH 090/140] Reacting to review comments. --- .../GlobalPartitionEndpointManagerForCircuitBreaker.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index b76ed7c3a329..631f3d4218c0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -181,7 +181,7 @@ private Flux updateStaleLocationInfo() { .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { - logger.info("Background updateStaleLocationInfo kicking in..."); + logger.debug("Background updateStaleLocationInfo kicking in..."); PartitionKeyRangeWrapper partitionKeyRangeWrapper = partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair.getKey(); From 652eb17a530d7ef8b24a4b0edf6127a6f14ca881 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 24 Jun 2024 12:14:07 -0400 Subject: [PATCH 091/140] Refactoring tests. --- .../PartitionLevelCircuitBreakerTests.java | 386 +++++++++--------- 1 file changed, 199 insertions(+), 187 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index be9f5ab37a44..b6ca07c464dd 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -67,10 +67,12 @@ import java.time.Duration; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; import java.util.UUID; import java.util.concurrent.ConcurrentHashMap; import java.util.function.Consumer; @@ -85,15 +87,15 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { = ImplementationBridgeHelpers.CosmosAsyncContainerHelper.getCosmosAsyncContainerAccessor(); private List writeRegions; - private static final CosmosEndToEndOperationLatencyPolicyConfig noEndToEndTimeout + private static final CosmosEndToEndOperationLatencyPolicyConfig NO_END_TO_END_TIMEOUT = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofDays(1)).build(); - private static final CosmosEndToEndOperationLatencyPolicyConfig twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy + private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) .build(); - private static final CosmosEndToEndOperationLatencyPolicyConfig twoSecondEndToEndTimeoutWithoutAvailabilityStrategy + private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) .build(); @@ -191,9 +193,15 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { private final Function> buildReadWriteSessionNotAvailableRules = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableRules; - private static final CosmosRegionSwitchHint noRegionSwitchHint = null; + private static final CosmosRegionSwitchHint NO_REGION_SWITCH_HINT = null; - private static final Boolean nonIdempotentWriteRetriesEnabled = true; + private static final Boolean NON_IDEMPOTENT_WRITE_RETRIES_ENABLED = true; + + private static final Set ALL_CONNECTION_MODES_INCLUDED = new HashSet<>(); + + private static final Set ONLY_DIRECT_MODE = new HashSet<>(); + + private static final Set ONLY_GATEWAY_MODE = new HashSet<>(); private String firstPreferredRegion = null; @@ -232,6 +240,11 @@ public void beforeClass() { this.singlePartitionAsyncContainerId = UUID.randomUUID().toString(); sharedAsyncDatabase.createContainerIfNotExists(this.singlePartitionAsyncContainerId, "/id").block(); + ALL_CONNECTION_MODES_INCLUDED.add(ConnectionMode.DIRECT); + ALL_CONNECTION_MODES_INCLUDED.add(ConnectionMode.GATEWAY); + ONLY_DIRECT_MODE.add(ConnectionMode.DIRECT); + ONLY_GATEWAY_MODE.add(ConnectionMode.GATEWAY); + try { Thread.sleep(3000); } catch (Exception ex) { @@ -286,15 +299,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, validateResponseHasSuccess, validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -308,15 +321,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -330,15 +343,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for DELETE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -352,16 +365,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateServiceUnavailableRules, - - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for PATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -375,15 +387,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -397,15 +409,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -420,15 +432,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for BATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -442,15 +454,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -463,15 +475,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 410 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -484,15 +496,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Server-generated 410 injected into first preferred region for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -506,15 +518,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Server-generated 410 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -528,15 +540,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Server-generated 410 injected into first preferred region for DELETE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -550,15 +562,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Server-generated 410 injected into first preferred region for PATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -572,15 +584,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Server-generated 410 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -594,15 +606,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Server-generated 410 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -617,15 +629,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateServerGeneratedGoneRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Response-delay injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -640,15 +652,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), generateTransitTimeoutRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Response-delay injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -663,15 +675,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), generateTransitTimeoutRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Response-delay injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -687,15 +699,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), generateTransitTimeoutRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasRequestTimeoutException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // Response-delay injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -711,15 +723,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), generateTransitTimeoutRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasRequestTimeoutException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 500 (internal server error) injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -732,15 +744,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -754,15 +766,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -775,15 +787,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -798,15 +810,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), generateInternalServerErrorRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 429 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -819,15 +831,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 429 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -841,15 +853,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 429 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -863,15 +875,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 404/1002 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -884,15 +896,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateReadOrWriteSessionNotAvailableRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - !nonIdempotentWriteRetriesEnabled, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 404/1002 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -906,15 +918,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateReadOrWriteSessionNotAvailableRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - !nonIdempotentWriteRetriesEnabled, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 449 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -928,15 +940,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateRetryWithRules, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 503 injected into all regions for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -949,15 +961,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions) .withHitLimit(11), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasServiceUnavailableError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 503 injected into all regions for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -971,15 +983,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions) .withHitLimit(6), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasServiceUnavailableError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 503 injected into all regions for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -992,15 +1004,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions) .withHitLimit(11), generateServiceUnavailableRules, - noEndToEndTimeout, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasServiceUnavailableError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 429 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1013,15 +1025,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 429 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1034,15 +1046,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, - noRegionSwitchHint, - nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 429 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1055,15 +1067,15 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), generateTooManyRequestsRules, - twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, - noRegionSwitchHint, - !nonIdempotentWriteRetriesEnabled, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE } }; } @@ -1109,14 +1121,14 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServiceUnavailableError, executeReadManyOperation, - noEndToEndTimeout, - noRegionSwitchHint, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // Server-generated 410 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1130,14 +1142,14 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServerGeneratedGoneError, executeReadManyOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 429 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1151,14 +1163,14 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildTooManyRequestsError, executeReadManyOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 404/1002 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1172,14 +1184,14 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildReadWriteSessionNotAvailableRules, executeReadManyOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 503 injected into all region for read many operation // injected into all replicas of the faulty EPK range. @@ -1194,14 +1206,14 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions), this.buildServiceUnavailableError, executeReadManyOperation, - noEndToEndTimeout, - noRegionSwitchHint, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, this.validateResponseHasServiceUnavailableError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 429 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1215,14 +1227,14 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsError, executeReadManyOperation, - twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED } }; } @@ -1270,14 +1282,14 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServiceUnavailableError, executeReadAllOperation, - noEndToEndTimeout, - noRegionSwitchHint, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1291,14 +1303,14 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServerGeneratedGoneError, executeReadAllOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1312,14 +1324,14 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildTooManyRequestsError, executeReadAllOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 404/1002 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1333,14 +1345,14 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildReadWriteSessionNotAvailableRules, executeReadAllOperation, - twoSecondEndToEndTimeoutWithoutAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ONLY_DIRECT_MODE }, // 503 injected into all region for read all operation // injected into all replicas of the faulty EPK range. @@ -1355,14 +1367,14 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions), this.buildServiceUnavailableError, executeReadAllOperation, - noEndToEndTimeout, - noRegionSwitchHint, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, this.validateResponseHasServiceUnavailableError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED }, // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1376,14 +1388,14 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsError, executeReadAllOperation, - twoSecondEndToEndTimeoutWithThresholdBasedAvailabilityStrategy, - noRegionSwitchHint, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, this.validateResponseHasSuccess, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ConnectionMode.DIRECT + ALL_CONNECTION_MODES_INCLUDED } }; } @@ -1401,7 +1413,7 @@ public void operationHitsTerminalExceptionAcrossKRegions( Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - ConnectionMode allowedConnectionMode) { + Set allowedConnectionModes) { List preferredRegions = this.writeRegions; @@ -1413,8 +1425,8 @@ public void operationHitsTerminalExceptionAcrossKRegions( ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (connectionPolicy.getConnectionMode() != allowedConnectionMode) { - throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", allowedConnectionMode)); + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); } CosmosAsyncClient asyncClient = null; From 8b3b2585d62e25707ce8b54ef3fc76024c92cb16 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 26 Jun 2024 09:47:04 -0400 Subject: [PATCH 092/140] Fixing tests. --- .../PartitionLevelCircuitBreakerTests.java | 51 ++++++++++++++----- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index b6ca07c464dd..07f82ac8202d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -1431,6 +1431,7 @@ public void operationHitsTerminalExceptionAcrossKRegions( CosmosAsyncClient asyncClient = null; FaultInjectionOperationType faultInjectionOperationType = faultInjectionRuleParamsWrapper.getFaultInjectionOperationType(); + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); try { @@ -1520,7 +1521,7 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - ConnectionMode allowedConnectionMode) { + Set allowedConnectionModes) { List preferredRegions = this.writeRegions; @@ -1536,10 +1537,12 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (connectionPolicy.getConnectionMode() != allowedConnectionMode) { - throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", allowedConnectionMode)); + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { + throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); } + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + try { asyncClient = clientBuilder.buildAsyncClient(); @@ -1622,7 +1625,7 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - ConnectionMode allowedConnectionMode) { + Set allowedConnectionModes) { CosmosAsyncClient asyncClient = null; @@ -1638,10 +1641,12 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( ConnectionPolicy connectionPolicy = ReflectionUtils.getConnectionPolicy(clientBuilder); - if (connectionPolicy.getConnectionMode() != allowedConnectionMode) { + if (!allowedConnectionModes.contains(connectionPolicy.getConnectionMode())) { throw new SkipException(String.format("Test is not applicable to %s connectivity mode!", connectionPolicy.getConnectionMode())); } + faultInjectionRuleParamsWrapper.withFaultInjectionConnectionType(evaluateFaultInjectionConnectionType(connectionPolicy.getConnectionMode())); + try { asyncClient = clientBuilder.buildAsyncClient(); @@ -2334,7 +2339,7 @@ private static class FaultInjectionRuleParamsWrapper { private List faultInjectionApplicableRegions; private FeedRange faultInjectionApplicableFeedRange; private FaultInjectionOperationType faultInjectionOperationType; - private List itemIdentitiesForReadMany; + private FaultInjectionConnectionType faultInjectionConnectionType; public CosmosAsyncContainer getFaultInjectionApplicableAsyncContainer() { return faultInjectionApplicableAsyncContainer; @@ -2398,6 +2403,15 @@ public FaultInjectionRuleParamsWrapper withFaultInjectionOperationType(FaultInje this.faultInjectionOperationType = faultInjectionOperationType; return this; } + + public FaultInjectionConnectionType getFaultInjectionConnectionType() { + return faultInjectionConnectionType; + } + + public FaultInjectionRuleParamsWrapper withFaultInjectionConnectionType(FaultInjectionConnectionType faultInjectionConnectionType) { + this.faultInjectionConnectionType = faultInjectionConnectionType; + return this; + } } private static Map getRegionMap(DatabaseAccount databaseAccount, boolean writeOnly) { @@ -2421,7 +2435,7 @@ private static List buildServiceUnavailableRules(FaultInject FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() .operationType(paramsWrapper.getFaultInjectionOperationType()) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) .region(applicableRegion) .build(); @@ -2454,7 +2468,7 @@ private static List buildServerGeneratedGoneRules(FaultInjec FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() .operationType(paramsWrapper.getFaultInjectionOperationType()) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) .region(applicableRegion) .build(); @@ -2485,7 +2499,7 @@ private static List buildTransitTimeoutRules(FaultInjectionR FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() .operationType(paramsWrapper.getFaultInjectionOperationType()) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) .region(applicableRegion) .build(); @@ -2514,7 +2528,7 @@ private static List buildReadWriteSessionNotAvailableRules(F FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() .operationType(paramsWrapper.getFaultInjectionOperationType()) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) .region(applicableRegion) .build(); @@ -2543,7 +2557,7 @@ private static List buildTooManyRequestsRules(FaultInjection FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() .operationType(paramsWrapper.getFaultInjectionOperationType()) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) .region(applicableRegion) .build(); @@ -2572,7 +2586,7 @@ private static List buildInternalServerErrorRules(FaultInjec FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() .operationType(paramsWrapper.getFaultInjectionOperationType()) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) .region(applicableRegion) .build(); @@ -2600,7 +2614,7 @@ private static List buildRetryWithFaultInjectionRules(FaultI FaultInjectionCondition faultInjectionCondition = new FaultInjectionConditionBuilder() .operationType(paramsWrapper.getFaultInjectionOperationType()) - .connectionType(FaultInjectionConnectionType.DIRECT) + .connectionType(paramsWrapper.getFaultInjectionConnectionType()) .endpoints(new FaultInjectionEndpointBuilder(paramsWrapper.getFaultInjectionApplicableFeedRange()).build()) .region(applicableRegion) .build(); @@ -2698,6 +2712,17 @@ private static int getAverageExceptionCountByPartitionKeyRangeByRegion( } + private static FaultInjectionConnectionType evaluateFaultInjectionConnectionType(ConnectionMode connectionMode) { + + if (connectionMode == ConnectionMode.DIRECT) { + return FaultInjectionConnectionType.DIRECT; + } else if (connectionMode == ConnectionMode.GATEWAY) { + return FaultInjectionConnectionType.GATEWAY; + } + + throw new IllegalArgumentException("Unsupported connection mode : " + connectionMode); + } + private enum QueryType { READ_MANY, READ_ALL } From 89901655da5cea33e81d4c099a5d5234685d4e65 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 26 Jun 2024 10:39:19 -0400 Subject: [PATCH 093/140] Fixing tests. --- .../PartitionLevelCircuitBreakerTests.java | 256 ++++++++++-------- 1 file changed, 143 insertions(+), 113 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 07f82ac8202d..3b4a24ed0ab9 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -181,17 +181,26 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { assertThat(responseWrapper.cosmosException.getSubStatusCode()).isNotEqualTo(HttpConstants.SubStatusCodes.CLIENT_OPERATION_TIMEOUT); }; - private final Function> buildServiceUnavailableError - = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; + private final Function> buildServiceUnavailableFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildServiceUnavailableFaultInjectionRules; - private final Function> buildServerGeneratedGoneError - = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneRules; + private final Function> buildServerGeneratedGoneErrorFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneErrorFaultInjectionRules; - private final Function> buildTooManyRequestsError - = PartitionLevelCircuitBreakerTests::buildTooManyRequestsRules; + private final Function> buildTooManyRequestsErrorFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildTooManyRequestsErrorFaultInjectionRules; - private final Function> buildReadWriteSessionNotAvailableRules - = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableRules; + private final Function> buildReadWriteSessionNotAvailableFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableFaultInjectionRules; + + private final Function> buildTransitTimeoutFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildTransitTimeoutFaultInjectionRules; + + private final Function> buildInternalServerErrorFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildInternalServerErrorFaultInjectionRules; + + private final Function> buildRetryWithFaultInjectionRules + = PartitionLevelCircuitBreakerTests::buildRetryWithFaultInjectionRules; private static final CosmosRegionSwitchHint NO_REGION_SWITCH_HINT = null; @@ -259,26 +268,26 @@ public void beforeClass() { @DataProvider(name = "partitionLevelCircuitBreakerTestConfigs") public Object[][] partitionLevelCircuitBreakerTestConfigs() { - Function> generateServiceUnavailableRules - = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; - - Function> generateServerGeneratedGoneRules - = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneRules; - - Function> generateTransitTimeoutRules - = PartitionLevelCircuitBreakerTests::buildTransitTimeoutRules; - - Function> generateInternalServerErrorRules - = PartitionLevelCircuitBreakerTests::buildInternalServerErrorRules; - - Function> generateTooManyRequestsRules - = PartitionLevelCircuitBreakerTests::buildTooManyRequestsRules; - - Function> generateReadOrWriteSessionNotAvailableRules - = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableRules; - - Function> generateRetryWithRules - = PartitionLevelCircuitBreakerTests::buildRetryWithFaultInjectionRules; +// Function> generateServiceUnavailableRules +// = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; +// +// Function> generateServerGeneratedGoneRules +// = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneRules; +// +// Function> generateTransitTimeoutRules +// = PartitionLevelCircuitBreakerTests::buildTransitTimeoutRules; +// +// Function> generateInternalServerErrorRules +// = PartitionLevelCircuitBreakerTests::buildInternalServerErrorRules; +// +// Function> generateTooManyRequestsRules +// = PartitionLevelCircuitBreakerTests::buildTooManyRequestsRules; +// +// Function> generateReadOrWriteSessionNotAvailableRules +// = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableRules; +// +// Function> generateRetryWithRules +// = PartitionLevelCircuitBreakerTests::buildRetryWithFaultInjectionRules; // General testing flow: // Below tests choose a fault type to inject, regions to inject the fault in @@ -298,7 +307,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -320,7 +329,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -342,7 +351,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -364,7 +373,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -386,7 +395,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -408,7 +417,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -431,7 +440,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -453,7 +462,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -474,7 +483,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -487,7 +496,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { }, // Server-generated 410 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.READ_ITEM), @@ -495,7 +504,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, + this.buildServerGeneratedGoneErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -509,7 +518,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // Server-generated 410 injected into first preferred region for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), @@ -517,7 +526,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, + this.buildServerGeneratedGoneErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -531,7 +540,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // Server-generated 410 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), @@ -539,7 +548,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, + this.buildServerGeneratedGoneErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -553,7 +562,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // Server-generated 410 injected into first preferred region for DELETE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), @@ -561,7 +570,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, + this.buildServerGeneratedGoneErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -575,7 +584,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // Server-generated 410 injected into first preferred region for PATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), @@ -583,7 +592,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, + this.buildServerGeneratedGoneErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -597,7 +606,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // Server-generated 410 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with server-generated gone in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), @@ -605,7 +614,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, + this.buildServerGeneratedGoneErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -618,7 +627,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { }, // Server-generated 410 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. Even // when short-circuiting of first preferred region has kicked in, the first preferred region is contacted // to fetch the QueryPlan. @@ -628,7 +637,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateServerGeneratedGoneRules, + this.buildServerGeneratedGoneErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -643,7 +652,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) - // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -651,7 +660,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), - generateTransitTimeoutRules, + this.buildTransitTimeoutFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -666,7 +675,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) - // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -674,7 +683,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), - generateTransitTimeoutRules, + this.buildTransitTimeoutFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -690,7 +699,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // is ever involved - effectively doesn't impact the assertions for this test). // Expectation is for the operation to hit RequestTimeoutException (due to network request timeout of 5s kicking in) // and because NonIdempotentWriteRetryPolicy isn't enabled - // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region and with no end-to-end operation timeout configured.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -698,7 +707,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), - generateTransitTimeoutRules, + this.buildTransitTimeoutFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -714,7 +723,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // is ever involved - effectively doesn't impact the assertions for this test). // Expectation is for the operation to hit RequestTimeoutException (due to network request timeout of 5s kicking in) // and because NonIdempotentWriteRetryPolicy isn't enabled - // and only to succeed once moved over to the second preferred region when the first preferred region has been short-circuited. + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region and with no end-to-end operation timeout configured.", FaultInjectionOperationType.REPLACE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -722,7 +731,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), - generateTransitTimeoutRules, + this.buildTransitTimeoutFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -743,7 +752,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), - generateInternalServerErrorRules, + this.buildTransitTimeoutFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -765,7 +774,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), - generateInternalServerErrorRules, + this.buildTransitTimeoutFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -781,12 +790,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation // should see a success from the second preferred region. { - String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), - generateInternalServerErrorRules, + this.buildInternalServerErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -804,12 +813,12 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // should see a success from the second preferred region. Although, after short-circuiting, a query operation // will see request for QueryPlan from the short-circuited region. { - String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), - generateInternalServerErrorRules, + this.buildInternalServerErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -822,7 +831,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { }, // 429 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), @@ -830,7 +839,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, + this.buildTooManyRequestsErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -844,7 +853,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // 429 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), @@ -852,7 +861,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, + this.buildTooManyRequestsErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -865,7 +874,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { }, // 429 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. { @@ -874,7 +883,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, + this.buildTooManyRequestsErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -887,7 +896,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { }, // 404/1002 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), @@ -895,7 +904,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateReadOrWriteSessionNotAvailableRules, + this.buildReadWriteSessionNotAvailableFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -909,7 +918,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // 404/1002 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), @@ -917,7 +926,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateReadOrWriteSessionNotAvailableRules, + this.buildReadWriteSessionNotAvailableFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -931,7 +940,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // 449 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), @@ -939,7 +948,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateRetryWithRules, + this.buildRetryWithFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -960,7 +969,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) .withHitLimit(11), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -982,7 +991,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) .withHitLimit(6), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -1003,7 +1012,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions) .withHitLimit(11), - generateServiceUnavailableRules, + this.buildServiceUnavailableFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -1017,14 +1026,14 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // 429 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) - // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. new Object[]{ String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, + this.buildTooManyRequestsErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -1038,14 +1047,14 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { // 429 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) - // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. new Object[]{ String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, + this.buildTooManyRequestsErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -1066,7 +1075,7 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - generateTooManyRequestsRules, + this.buildTooManyRequestsErrorFaultInjectionRules, TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, @@ -1119,7 +1128,28 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildServiceUnavailableError, + this.buildServiceUnavailableFaultInjectionRules, + executeReadManyOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // Internal server error injected into first preferred region for read many operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit InternalServerError and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read many operation injected with internal server error injected in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildInternalServerErrorFaultInjectionRules, executeReadManyOperation, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1132,7 +1162,7 @@ public Object[][] readManyTestConfigs() { }, // Server-generated 410 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read many operation injected with server-generated gone in first preferred region.", @@ -1140,7 +1170,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildServerGeneratedGoneError, + this.buildServerGeneratedGoneErrorFaultInjectionRules, executeReadManyOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1153,7 +1183,7 @@ public Object[][] readManyTestConfigs() { }, // 429 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read many operation injected with too many requests error in first preferred region.", @@ -1161,7 +1191,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildTooManyRequestsError, + this.buildTooManyRequestsErrorFaultInjectionRules, executeReadManyOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1174,7 +1204,7 @@ public Object[][] readManyTestConfigs() { }, // 404/1002 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read many operation injected with read session not available error in first preferred region.", @@ -1182,7 +1212,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildReadWriteSessionNotAvailableRules, + this.buildReadWriteSessionNotAvailableFaultInjectionRules, executeReadManyOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1195,7 +1225,7 @@ public Object[][] readManyTestConfigs() { }, // 503 injected into all region for read many operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit 503 and only to succeed once + // Expectation is for the operation to hit 503 and only to succeed when // fault injection has hit its injection limits. Also, the success is // from the first preferred region. { @@ -1204,7 +1234,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions), - this.buildServiceUnavailableError, + this.buildServiceUnavailableFaultInjectionRules, executeReadManyOperation, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1218,14 +1248,14 @@ public Object[][] readManyTestConfigs() { // 429 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) - // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. new Object[]{ "Test faulty read many operation with too many requests error in first preferred region with threshold-based availability strategy enabled.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsError, + this.buildTooManyRequestsErrorFaultInjectionRules, executeReadManyOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1280,7 +1310,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildServiceUnavailableError, + this.buildServiceUnavailableFaultInjectionRules, executeReadAllOperation, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1293,7 +1323,7 @@ public Object[][] readAllTestConfigs() { }, // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read all operation injected with server-generated GONE in first preferred region.", @@ -1301,7 +1331,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildServerGeneratedGoneError, + this.buildServerGeneratedGoneErrorFaultInjectionRules, executeReadAllOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1314,7 +1344,7 @@ public Object[][] readAllTestConfigs() { }, // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read all operation injected with too many requests error in first preferred region.", @@ -1322,7 +1352,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildTooManyRequestsError, + this.buildTooManyRequestsErrorFaultInjectionRules, executeReadAllOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1335,7 +1365,7 @@ public Object[][] readAllTestConfigs() { }, // 404/1002 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed once + // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. { "Test read all operation injected with read/write session not available error in first preferred region.", @@ -1343,7 +1373,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionDuration(Duration.ofSeconds(60)) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), - this.buildReadWriteSessionNotAvailableRules, + this.buildReadWriteSessionNotAvailableFaultInjectionRules, executeReadAllOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -1356,7 +1386,7 @@ public Object[][] readAllTestConfigs() { }, // 503 injected into all region for read all operation // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit 503 and only to succeed once + // Expectation is for the operation to hit 503 and only to succeed when // fault injection has hit its injection limits. Also, the success is // from the first preferred region. { @@ -1365,7 +1395,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions), - this.buildServiceUnavailableError, + this.buildServiceUnavailableFaultInjectionRules, executeReadAllOperation, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -1379,14 +1409,14 @@ public Object[][] readAllTestConfigs() { // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) - // and only from the second preferred region once short-circuiting has kicked in for the first preferred region. + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. new Object[]{ "Test faulty read all operation with too many requests error in first preferred region with threshold-based availability strategy enabled.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsError, + this.buildTooManyRequestsErrorFaultInjectionRules, executeReadAllOperation, TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, @@ -2427,7 +2457,7 @@ private static Map getRegionMap(DatabaseAccount databaseAccount, return regionMap; } - private static List buildServiceUnavailableRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + private static List buildServiceUnavailableFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { List faultInjectionRules = new ArrayList<>(); @@ -2456,7 +2486,7 @@ private static List buildServiceUnavailableRules(FaultInject return faultInjectionRules; } - private static List buildServerGeneratedGoneRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + private static List buildServerGeneratedGoneErrorFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders .getResultBuilder(FaultInjectionServerErrorType.GONE) @@ -2485,7 +2515,7 @@ private static List buildServerGeneratedGoneRules(FaultInjec return faultInjectionRules; } - private static List buildTransitTimeoutRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + private static List buildTransitTimeoutFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders .getResultBuilder(FaultInjectionServerErrorType.RESPONSE_DELAY) @@ -2516,7 +2546,7 @@ private static List buildTransitTimeoutRules(FaultInjectionR return faultInjectionRules; } - private static List buildReadWriteSessionNotAvailableRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + private static List buildReadWriteSessionNotAvailableFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders .getResultBuilder(FaultInjectionServerErrorType.READ_SESSION_NOT_AVAILABLE) @@ -2545,7 +2575,7 @@ private static List buildReadWriteSessionNotAvailableRules(F return faultInjectionRules; } - private static List buildTooManyRequestsRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + private static List buildTooManyRequestsErrorFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders .getResultBuilder(FaultInjectionServerErrorType.TOO_MANY_REQUEST) @@ -2574,7 +2604,7 @@ private static List buildTooManyRequestsRules(FaultInjection return faultInjectionRules; } - private static List buildInternalServerErrorRules(FaultInjectionRuleParamsWrapper paramsWrapper) { + private static List buildInternalServerErrorFaultInjectionRules(FaultInjectionRuleParamsWrapper paramsWrapper) { FaultInjectionServerErrorResult faultInjectionServerErrorResult = FaultInjectionResultBuilders .getResultBuilder(FaultInjectionServerErrorType.INTERNAL_SERVER_ERROR) From f98f087163f24099bd0d412017194a98b2bd6a3a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 26 Jun 2024 12:17:18 -0400 Subject: [PATCH 094/140] Fixing tests. --- .../PartitionLevelCircuitBreakerTests.java | 85 ++++++++++++------- 1 file changed, 53 insertions(+), 32 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 3b4a24ed0ab9..4485618f813e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -265,29 +265,8 @@ public void beforeClass() { } } - @DataProvider(name = "partitionLevelCircuitBreakerTestConfigs") - public Object[][] partitionLevelCircuitBreakerTestConfigs() { - -// Function> generateServiceUnavailableRules -// = PartitionLevelCircuitBreakerTests::buildServiceUnavailableRules; -// -// Function> generateServerGeneratedGoneRules -// = PartitionLevelCircuitBreakerTests::buildServerGeneratedGoneRules; -// -// Function> generateTransitTimeoutRules -// = PartitionLevelCircuitBreakerTests::buildTransitTimeoutRules; -// -// Function> generateInternalServerErrorRules -// = PartitionLevelCircuitBreakerTests::buildInternalServerErrorRules; -// -// Function> generateTooManyRequestsRules -// = PartitionLevelCircuitBreakerTests::buildTooManyRequestsRules; -// -// Function> generateReadOrWriteSessionNotAvailableRules -// = PartitionLevelCircuitBreakerTests::buildReadWriteSessionNotAvailableRules; -// -// Function> generateRetryWithRules -// = PartitionLevelCircuitBreakerTests::buildRetryWithFaultInjectionRules; + @DataProvider(name = "miscellaneousOpTestConfigs") + public Object[][] miscellaneousOpTestConfigs() { // General testing flow: // Below tests choose a fault type to inject, regions to inject the fault in @@ -957,7 +936,29 @@ public Object[][] partitionLevelCircuitBreakerTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ALL_CONNECTION_MODES_INCLUDED + }, + // 449 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildRetryWithFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED }, // 503 injected into all regions for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1153,10 +1154,10 @@ public Object[][] readManyTestConfigs() { executeReadManyOperation, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, - this.validateResponseHasSuccess, + this.validateResponseHasInternalServerError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ALL_CONNECTION_MODES_INCLUDED }, @@ -1321,7 +1322,28 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ALL_CONNECTION_MODES_INCLUDED }, - // 429 injected into first preferred region for read all operation + // Internal server error injected into first preferred region for read all operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit InternalServerError and bubble it from the first preferred region + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. + { + "Test read all operation injected with internal server error injected in first preferred region.", + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withHitLimit(11) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), + this.buildInternalServerErrorFaultInjectionRules, + executeReadAllOperation, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 410 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. @@ -1430,8 +1452,8 @@ public Object[][] readAllTestConfigs() { }; } - @Test(groups = {"multi-master"}, dataProvider = "partitionLevelCircuitBreakerTestConfigs", timeOut = 80 * TIMEOUT) - public void operationHitsTerminalExceptionAcrossKRegions( + @Test(groups = {"multi-master"}, dataProvider = "miscellaneousOpTestConfigs", timeOut = 80 * TIMEOUT) + public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegions( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, Function> generateFaultInjectionRules, @@ -1949,8 +1971,8 @@ private void execute( } } - logger.info("Sleep for 90 seconds"); - Thread.sleep(90_000); + logger.info("Sleep for 70 seconds to allow Unavailable partitions to be HealthyTentative"); + Thread.sleep(70_000); for (int i = operationIterationCountInFailureFlow + 1; i <= operationIterationCountInFailureFlow + operationIterationCountInRecoveryFlow; i++) { @@ -2305,7 +2327,6 @@ public void afterClass() { } } - private static class ResponseWrapper { private final CosmosItemResponse cosmosItemResponse; From 471586db5dd73e8d1369a54818b57f9faad74fe0 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 27 Jun 2024 18:04:45 -0400 Subject: [PATCH 095/140] Reacting to review comments. --- .../java/com/azure/cosmos/benchmark/SyncBenchmark.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java index fdefad27851e..dea8dd6449d6 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java @@ -115,9 +115,13 @@ public T apply(T o, Throwable throwable) { logger = LoggerFactory.getLogger(this.getClass()); if (configuration.isPartitionLevelCircuitBreakerEnabled()) { - System.setProperty("COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"COUNT_BASED\"," - + "\"circuitBreakerFailureTolerance\": \"LOW\"}"); + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); } CosmosClientBuilder cosmosClientBuilder = new CosmosClientBuilder() From 2e748cb9a21a35e52832abff5c762be9485133db Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 29 Jun 2024 17:45:14 -0400 Subject: [PATCH 096/140] Reacting to review comments. --- .../cosmos/RetryContextOnDiagnosticTest.java | 6 +-- .../implementation/ConsistencyTests1.java | 4 +- .../implementation/ConsistencyTestsBase.java | 44 ++++++++-------- .../RequestHeadersSpyWireTest.java | 8 +-- .../cosmos/implementation/SessionTest.java | 18 +++---- .../cosmos/implementation/TestSuiteBase.java | 4 +- .../DCDocumentCrudTest.java | 2 +- .../com/azure/cosmos/rx/ChangeFeedTest.java | 2 +- .../azure/cosmos/rx/ResourceTokenTest.java | 10 ++-- .../azure/cosmos/CosmosAsyncContainer.java | 16 +++--- .../implementation/AsyncDocumentClient.java | 12 ++--- .../ClientSideRequestStatistics.java | 50 +++++++++---------- .../implementation/RxDocumentClientImpl.java | 44 ++++++++++------ 13 files changed, 116 insertions(+), 104 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java index a1f823196000..29734e89d372 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/RetryContextOnDiagnosticTest.java @@ -230,7 +230,7 @@ public void retryContextMockTestOnCRUDOperation() throws NoSuchFieldException, I Mockito.when(retryPolicy.getRetryContext()).thenReturn(retryContext); Mockito.when(retryContext.getRetryCount()).thenReturn(1); Mockito.when(mockRetryFactory.getRequestPolicy(ArgumentMatchers.any(DiagnosticsClientContext.class))).thenReturn(retryPolicy); - responseFlux = rxDocumentClient.readDocument(itemSelfLink, requestOptions, cosmosAsyncContainer.getLinkWithoutTrailingSlash()); + responseFlux = rxDocumentClient.readDocument(itemSelfLink, requestOptions); validateServiceResponseSuccess(responseFlux); Mockito.verify(retryContext, Mockito.times(1)).getRetryCount(); @@ -240,7 +240,7 @@ public void retryContextMockTestOnCRUDOperation() throws NoSuchFieldException, I Mockito.when(retryPolicy.getRetryContext()).thenReturn(retryContext); Mockito.when(retryContext.getRetryCount()).thenReturn(1); Mockito.when(mockRetryFactory.getRequestPolicy(ArgumentMatchers.any(DiagnosticsClientContext.class))).thenReturn(retryPolicy); - responseFlux = rxDocumentClient.deleteDocument(itemSelfLink, requestOptions, cosmosAsyncContainer.getLinkWithoutTrailingSlash()); + responseFlux = rxDocumentClient.deleteDocument(itemSelfLink, requestOptions); validateServiceResponseSuccess(responseFlux); Mockito.verify(retryContext, Mockito.times(1)).getRetryCount(); @@ -250,7 +250,7 @@ public void retryContextMockTestOnCRUDOperation() throws NoSuchFieldException, I Mockito.when(retryPolicy.getRetryContext()).thenReturn(retryContext); Mockito.when(retryContext.getRetryCount()).thenReturn(1); Mockito.when(mockRetryFactory.getRequestPolicy(ArgumentMatchers.any(DiagnosticsClientContext.class))).thenReturn(retryPolicy); - responseFlux = rxDocumentClient.replaceDocument(itemSelfLink, new Document(), requestOptions, cosmosAsyncContainer.getLinkWithoutTrailingSlash()); + responseFlux = rxDocumentClient.replaceDocument(itemSelfLink, new Document(), requestOptions); validateServiceResponseSuccess(responseFlux); Mockito.verify(retryContext, Mockito.times(1)).getRetryCount(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java index 2189f85d9c9a..22cba6842472 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTests1.java @@ -348,11 +348,11 @@ private void validateSubstatusCodeOnNotFoundExceptionInSessionReadAsync(boolean documentDefinition.setId("1"); Document document = client.createDocument(collection.getSelfLink(), documentDefinition, requestOptions, false).block().getResource(); - Mono> deleteObservable = client.deleteDocument(document.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + Mono> deleteObservable = client.deleteDocument(document.getSelfLink(), requestOptions); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .nullResource().build(); validateSuccess(deleteObservable, validator); - Mono> readObservable = client.readDocument(document.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + Mono> readObservable = client.readDocument(document.getSelfLink(), requestOptions); FailureValidator notFoundValidator = new FailureValidator.Builder().resourceNotFound().unknownSubStatusCode().build(); validateFailure(readObservable, notFoundValidator); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java index 034b0a9fa6b0..7dc4619fee59 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ConsistencyTestsBase.java @@ -77,13 +77,13 @@ void validateConsistentLSN() { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(documentDefinition.get("mypk"))); Document document = createDocument(this.writeClient, createdDatabase.getId(), createdCollection.getId(), documentDefinition); - ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); + ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options).block(); assertThat(response.getStatusCode()).isEqualTo(204); long quorumAckedLSN = Long.parseLong(response.getResponseHeaders().get(WFConstants.BackendHeaders.QUORUM_ACKED_LSN)); assertThat(quorumAckedLSN > 0).isTrue(); FailureValidator validator = new FailureValidator.Builder().statusCode(404).lsnGreaterThan(quorumAckedLSN).build(); - Mono> readObservable = this.readClient.readDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + Mono> readObservable = this.readClient.readDocument(document.getSelfLink(), options); validateFailure(readObservable, validator); } @@ -92,14 +92,14 @@ void validateConsistentLSNAndQuorumAckedLSN() { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(documentDefinition.get("mypk"))); Document document = createDocument(this.writeClient, createdDatabase.getId(), createdCollection.getId(), documentDefinition); - ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); + ResourceResponse response = this.writeClient.deleteDocument(document.getSelfLink(), options).block(); assertThat(response.getStatusCode()).isEqualTo(204); long quorumAckedLSN = Long.parseLong(response.getResponseHeaders().get(WFConstants.BackendHeaders.QUORUM_ACKED_LSN)); assertThat(quorumAckedLSN > 0).isTrue(); FailureValidator validator = new FailureValidator.Builder().statusCode(404).lsnGreaterThanEqualsTo(quorumAckedLSN).exceptionQuorumAckedLSNInNotNull().build(); - Mono> readObservable = this.readClient.deleteDocument(document.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + Mono> readObservable = this.readClient.deleteDocument(document.getSelfLink(), options); validateFailure(readObservable, validator); } @@ -157,10 +157,10 @@ void validateStrongConsistency(Document documentToWorkWith, String collectionLin Thread.sleep(1000);//Timestamp is in granularity of seconds. RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(documentToWorkWith.get("mypk"))); - Document updatedDocument = this.writeClient.replaceDocument(writeDocument, options, collectionLink).block().getResource(); + Document updatedDocument = this.writeClient.replaceDocument(writeDocument, options).block().getResource(); assertThat(updatedDocument.getTimestamp().isAfter(sourceTimestamp)).isTrue(); - Document readDocument = this.readClient.readDocument(documentToWorkWith.getSelfLink(), options, collectionLink).block().getResource(); + Document readDocument = this.readClient.readDocument(documentToWorkWith.getSelfLink(), options).block().getResource(); assertThat(updatedDocument.getTimestamp().equals(readDocument.getTimestamp())); } } @@ -284,7 +284,7 @@ boolean validateConsistentPrefix(Resource resourceToWorkWith) throws Interrupted } else if (resourceToWorkWith instanceof Document) { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(resourceToWorkWith.get("mypk"))); - readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())) + readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), options) .block() .getResource(); } @@ -320,7 +320,7 @@ boolean validateReadSession(Resource resourceToWorkWith) throws InterruptedExcep RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(resourceToWorkWith.get("mypk"))); if (resourceToWorkWith instanceof Document) { - readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block().getResource(); + readResource = this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions).block().getResource(); } assertThat(readResource.getTimestamp().compareTo(lastReadDateTime) >= 0).isTrue(); lastReadDateTime = readResource.getTimestamp(); @@ -353,7 +353,7 @@ boolean validateWriteSession(Resource resourceToWorkWith) throws InterruptedExce requestOptions.setPartitionKey(new PartitionKey(resourceToWorkWith.get("mypk"))); if (resourceToWorkWith instanceof Document) { readResource = - this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())) + this.readClient.readDocument(resourceToWorkWith.getSelfLink(), requestOptions) .block() .getResource(); } @@ -430,8 +430,8 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway, boolean Document documentCreated = client2.createDocument(collection.getSelfLink(), documentDefinition, null, true).block().getResource(); RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(documentCreated.get("mypk"))); - client2.readDocument(BridgeInternal.getAltLink(documentCreated), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); - client2.readDocument(documentCreated.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); + client2.readDocument(BridgeInternal.getAltLink(documentCreated), requestOptions).block(); + client2.readDocument(documentCreated.getSelfLink(), requestOptions).block(); } { @@ -466,7 +466,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway, boolean ResourceResponseValidator successValidator = new ResourceResponseValidator.Builder() .withId(createdDocument.getId()) .build(); - Mono> readObservable = client1.readDocument(createdDocument.getSelfLink(), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + Mono> readObservable = client1.readDocument(createdDocument.getSelfLink(), requestOptions); validateSuccess(readObservable, successValidator); { String token1 = getGlobalSessionToken(client1, collectionSameName, true, isRegionScopedSessionContainerEnabled); @@ -482,7 +482,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway, boolean RequestOptions requestOptions1 = new RequestOptions(); requestOptions1.setSessionToken(higherLsnToken); requestOptions1.setPartitionKey(new PartitionKey(createdDocument.get("mypk"))); - readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions1, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions1); FailureValidator failureValidator = new FailureValidator.Builder().subStatusCode(1002).build(); validateFailure(readObservable, failureValidator); } @@ -496,7 +496,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway, boolean } { // second read should succeed! - readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + readObservable = client2.readDocument(BridgeInternal.getAltLink(createdDocument), requestOptions); validateSuccess(readObservable, successValidator); } // verify deleting indeed delete the collection session token @@ -508,7 +508,7 @@ void validateSessionContainerAfterCollectionDeletion(boolean useGateway, boolean successValidator = new ResourceResponseValidator.Builder() .withId(documentTest.getId()) .build(); - readObservable = client1.readDocument(documentTest.getSelfLink(), options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + readObservable = client1.readDocument(documentTest.getSelfLink(), options); validateSuccess(readObservable, successValidator); client1.deleteCollection(collectionSameName.getSelfLink(), null).block(); @@ -621,7 +621,7 @@ void validateSessionTokenWithDocumentNotFoundExceptionBase(boolean useGateway, b RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(documentResponse.getResource().get("mypk"))); // try to read a non existent document in the same partition that we previously wrote to - Mono> readObservable = validationClient.readDocument(BridgeInternal.getAltLink(documentResponse.getResource()) + "dummy", requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + Mono> readObservable = validationClient.readDocument(BridgeInternal.getAltLink(documentResponse.getResource()) + "dummy", requestOptions); validateFailure(readObservable, failureValidator); assertThat(isSessionEqual(validationClient.getSession(), writeClient.getSession())).isTrue(); } finally { @@ -662,7 +662,7 @@ void validateSessionTokenWithExpectedExceptionBase(boolean useGateway, boolean i requestOptions.setSessionToken(higherLsnToken); // try to read a non existent document in the same partition that we previously wrote to Mono> readObservable = writeClient.readDocument(BridgeInternal.getAltLink(documentResponse.getResource()), - requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + requestOptions); validateFailure(readObservable, failureValidator); } finally { safeClose(writeClient); @@ -774,22 +774,22 @@ void validateSessionTokenMultiPartitionCollectionBase(boolean useGateway, boolea RequestOptions option = new RequestOptions(); option.setSessionToken(sessionToken); option.setPartitionKey(new PartitionKey(2)); - writeClient.readDocument(childResource2.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); + writeClient.readDocument(childResource2.getResource().getSelfLink(), option).block(); option = new RequestOptions(); option.setSessionToken(StringUtils.EMPTY); option.setPartitionKey(new PartitionKey(1)); - writeClient.readDocument(childResource1.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); + writeClient.readDocument(childResource1.getResource().getSelfLink(), option).block(); option = new RequestOptions(); option.setSessionToken(sessionToken); option.setPartitionKey(new PartitionKey(1)); - Mono> readObservable = writeClient.readDocument(childResource1.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + Mono> readObservable = writeClient.readDocument(childResource1.getResource().getSelfLink(), option); FailureValidator failureValidator = new FailureValidator.Builder().statusCode(HttpConstants.StatusCodes.NOTFOUND).subStatusCode(HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE).build(); validateFailure(readObservable, failureValidator); - readObservable = writeClient.readDocument(childResource2.getResource().getSelfLink(), option, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())); + readObservable = writeClient.readDocument(childResource2.getResource().getSelfLink(), option); failureValidator = new FailureValidator.Builder().statusCode(HttpConstants.StatusCodes.NOTFOUND).subStatusCode(HttpConstants.SubStatusCodes.READ_SESSION_NOT_AVAILABLE).build(); validateFailure(readObservable, failureValidator); @@ -828,7 +828,7 @@ void validateSessionTokenFromCollectionReplaceIsServerTokenBase(boolean useGatew Document doc = client1.createDocument(createdCollection.getSelfLink(), getDocumentDefinition(), null, true).block().getResource(); RequestOptions requestOptions = new RequestOptions(); requestOptions.setPartitionKey(new PartitionKey(doc.get("mypk"))); - Document doc1 = client1.readDocument(BridgeInternal.getAltLink(doc), requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block().getResource(); + Document doc1 = client1.readDocument(BridgeInternal.getAltLink(doc), requestOptions).block().getResource(); String token1 = client1.getSession().getSessionToken(createdCollection.getSelfLink()); client2 = (RxDocumentClientImpl) new AsyncDocumentClient.Builder() diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java index 512fb4c5c192..f304657d325a 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RequestHeadersSpyWireTest.java @@ -208,7 +208,7 @@ public void readItemWithMaxIntegratedCacheStaleness(CosmosItemRequestOptions cos cosmosItemRequestOptions, CosmosItemSerializer.DEFAULT_SERIALIZER); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - client.readDocument(documentLink, requestOptions, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())).block(); + client.readDocument(documentLink, requestOptions).block(); List requests = client.getCapturedRequests(); for (HttpRequest httpRequest : requests) { @@ -232,7 +232,7 @@ public void readItemWithMaxIntegratedCacheStalenessInNanoseconds() { CosmosItemSerializer.DEFAULT_SERIALIZER); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions, getDocumentCollectionLink()).block()) + assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions).block()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("MaxIntegratedCacheStaleness granularity is milliseconds"); } @@ -252,7 +252,7 @@ public void readItemWithMaxIntegratedCacheStalenessInNegative() { CosmosItemSerializer.DEFAULT_SERIALIZER); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions, getDocumentCollectionLink()).block()) + assertThatThrownBy(() -> client.readDocument(documentLink, requestOptions).block()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("MaxIntegratedCacheStaleness duration cannot be negative"); } @@ -274,7 +274,7 @@ public void readItemWithCacheBypass(boolean cacheBypass) { cosmosItemRequestOptions, CosmosItemSerializer.DEFAULT_SERIALIZER); requestOptions.setPartitionKey(new PartitionKey(DOCUMENT_ID)); - ResourceResponse response = client.readDocument(documentLink, requestOptions, getDocumentCollectionLink()).block(); + ResourceResponse response = client.readDocument(documentLink, requestOptions).block(); if (cacheBypass) { String responseHeader = response.getResponseHeaders().get("x-ms-cosmos-cache-bypass"); assertThat(responseHeader).isNotNull(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java index a060f20f9403..b95958874add 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SessionTest.java @@ -129,12 +129,12 @@ public void sessionConsistency_ReadYourWrites(boolean isNameBased) { spyClient.clearCapturedRequests(); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options).block(); assertThat(getSessionTokensInRequests()).hasSize(1); assertThat(getSessionTokensInRequests().get(0)).isNotEmpty(); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), options).block(); // same session token expected - because we collect // distinct session tokens only one of them should be kept @@ -171,14 +171,14 @@ public void partitionedSessionToken(boolean isNameBased) throws NoSuchMethodExce spyClient.clearCapturedRequests(); // Session token set for default session consistency - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); assertThat(getSessionTokensInRequests().get(0)).isNotEmpty(); assertThat(getSessionTokensInRequests().get(0)).doesNotContain(","); // making sure we have only one scope session token // Session token set for request session consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.SESSION); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); assertThat(getSessionTokensInRequests()).hasSize(1); assertThat(getSessionTokensInRequests().get(0)).isNotEmpty(); assertThat(getSessionTokensInRequests().get(0)).doesNotContain(","); // making sure we have only one scope session token @@ -337,13 +337,13 @@ public void sessionTokenNotRequired(boolean isNameBased) { // No session token set for EVENTUAL consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.EVENTUAL); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); assertThat(getSessionTokensInRequests()).hasSize(0); // No session token set for CONSISTENT_PREFIX consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.CONSISTENT_PREFIX); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); assertThat(getSessionTokensInRequests()).hasSize(0); if (globalEndpointManager.getLatestDatabaseAccount().getConsistencyPolicy().getDefaultConsistencyLevel().equals(ConsistencyLevel.STRONG) || @@ -351,7 +351,7 @@ public void sessionTokenNotRequired(boolean isNameBased) { // No session token set for BOUNDED_STALENESS consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.BOUNDED_STALENESS); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); assertThat(getSessionTokensInRequests()).hasSize(0); } @@ -359,7 +359,7 @@ public void sessionTokenNotRequired(boolean isNameBased) { // No session token set for STRONG consistency spyClient.clearCapturedRequests(); requestOptions.setConsistencyLevel(ConsistencyLevel.STRONG); - spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions, getCollectionLink(true)).block(); + spyClient.readDocument(getDocumentLink(documentCreated, isNameBased), requestOptions).block(); assertThat(getSessionTokensInRequests()).hasSize(0); } } @@ -394,7 +394,7 @@ public void sessionTokenInDocumentRead(boolean isNameBased) throws UnsupportedEn .getResource(); final String documentLink = getDocumentLink(document, isNameBased); - spyClient.readDocument(documentLink, options, getCollectionLink(true)).block() + spyClient.readDocument(documentLink, options).block() .getResource(); List documentReadHttpRequests = spyClient.getCapturedRequests().stream() diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java index 73bc905e211f..c0ca4e3da20d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/TestSuiteBase.java @@ -216,7 +216,7 @@ protected static void truncateCollection(DocumentCollection collection) { requestOptions.setPartitionKey(new PartitionKey(propertyValue)); } - return houseKeepingClient.deleteDocument(doc.getSelfLink(), requestOptions, collection.getSelfLink()); + return houseKeepingClient.deleteDocument(doc.getSelfLink(), requestOptions); }).then().block(); logger.info("Truncating DocumentCollection {} triggers ...", collection.getId()); @@ -585,7 +585,7 @@ public static void deleteDocumentIfExists(AsyncDocumentClient client, String dat public static void deleteDocument(AsyncDocumentClient client, String documentLink, PartitionKey pk, String collectionLink) { RequestOptions options = new RequestOptions(); options.setPartitionKey(pk); - client.deleteDocument(documentLink, options, collectionLink).block(); + client.deleteDocument(documentLink, options).block(); } public static void deleteUserIfExists(AsyncDocumentClient client, String databaseId, String userId) { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java index a888b6474581..d3383227620e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/DCDocumentCrudTest.java @@ -171,7 +171,7 @@ public void read() throws Exception { .withId(docDefinition.getId()) .build(); - validateSuccess(client.readDocument(docLink, options, TestUtils.getCollectionNameLink(createdDatabase.getId(), createdCollection.getId())), validator, TIMEOUT); + validateSuccess(client.readDocument(docLink, options), validator, TIMEOUT); validateNoDocumentOperationThroughGateway(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java index 70a35ff27830..75d670da5c91 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ChangeFeedTest.java @@ -455,7 +455,7 @@ public Document updateDocument(AsyncDocumentClient client, Document originalDocu originalDocument.set("prop", uuid, CosmosItemSerializer.DEFAULT_SERIALIZER); return client - .replaceDocument(originalDocument.getSelfLink(), originalDocument, null, getCollectionLink()) + .replaceDocument(originalDocument.getSelfLink(), originalDocument, null) .block() .getResource(); } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java index df6c5b9acae1..a94dfea5b85c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/ResourceTokenTest.java @@ -315,7 +315,7 @@ public void readDocumentFromPermissionFeed(String documentUrl, Permission permis } Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options, documentCollection.getSelfLink()); + .readDocument(documentUrl, options); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(documentId).build(); validateSuccess(readObservable, validator); @@ -346,7 +346,7 @@ public void readDocumentFromResouceToken(String resourceToken) throws Exception RequestOptions options = new RequestOptions(); options.setPartitionKey(PartitionKey.NONE); Mono> readObservable = asyncClientResourceToken - .readDocument(createdDocument.getSelfLink(), options, createdCollection.getSelfLink()); + .readDocument(createdDocument.getSelfLink(), options); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(createdDocument.getId()).build(); validateSuccess(readObservable, validator); @@ -382,7 +382,7 @@ public void readDocumentOfParKeyFromTwoCollPermissionWithDiffPartitionKeys(Strin RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(partitionKey)); Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options, documentCollection.getSelfLink()); + .readDocument(documentUrl, options); ResourceResponseValidator validator = new ResourceResponseValidator.Builder() .withId(documentId).build(); validateSuccess(readObservable, validator); @@ -419,7 +419,7 @@ public void readDocumentFromCollPermissionWithDiffPartitionKey_ResourceNotFound( options.setPartitionKey(new PartitionKey(partitionKey)); Mono> readObservable = asyncClientResourceToken - .readDocument(documentUrl, options, documentCollection.getSelfLink()); + .readDocument(documentUrl, options); FailureValidator validator = new FailureValidator.Builder().resourceNotFound().build(); validateFailure(readObservable, validator); } finally { @@ -451,7 +451,7 @@ public void readDocumentFromCollPermissionWithDiffPartitionKey_WithException() t RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(PARTITION_KEY_VALUE_2)); Mono> readObservable = asyncClientResourceToken - .readDocument(createdDocumentWithPartitionKey.getSelfLink(), options, createdCollection.getSelfLink()); + .readDocument(createdDocumentWithPartitionKey.getSelfLink(), options); FailureValidator validator = new FailureValidator.Builder().resourceTokenNotFound().build(); validateFailure(readObservable, validator); } finally { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java index a85087eb9333..3075d44f1375 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java @@ -395,7 +395,7 @@ private Mono> replaceItemWithTrackingId(Class itemT Mono> readMono = this.getDatabase().getDocClientWrapper() - .readDocument(getItemLink(itemId), requestOptions, this.getLinkWithoutTrailingSlash()) + .readDocument(getItemLink(itemId), requestOptions) .map(response -> { mergeDiagnostics(response, cosmosException); return itemResponseAccessor @@ -477,7 +477,7 @@ private Mono> createItemWithTrackingId( .toPartitionKey(partitionKeyInternal); readRequestOptions.setPartitionKey(partitionKey); - return clientWrapper.readDocument(getItemLink(itemId), readRequestOptions, this.getLinkWithoutTrailingSlash()) + return clientWrapper.readDocument(getItemLink(itemId), readRequestOptions) .map(response -> { mergeDiagnostics(response, cosmosException); return itemResponseAccessor @@ -959,10 +959,10 @@ Function>> queryItemsInternalFu CosmosAsyncClient client = this.getDatabase().getClient(); CosmosQueryRequestOptions options = cosmosQueryRequestOptions != null ? cosmosQueryRequestOptions : new CosmosQueryRequestOptions(); - + Function>> pagedFluxOptionsFluxFunction = (pagedFluxOptions -> { String spanName = this.queryItemsSpanName; - + ShowQueryMode showQueryMode = clientTelemetryConfigAccessor.showQueryMode(client.getClientTelemetryConfig()); if(ShowQueryMode.PARAMETERIZED_ONLY.equals(showQueryMode) && isParameterized) { @@ -2111,7 +2111,7 @@ private Mono> deleteItemInternalCore( Context context) { Mono> responseMono = this.getDatabase() .getDocClientWrapper() - .deleteDocument(getItemLink(itemId), internalObjectNode, requestOptions, this.getLinkWithoutTrailingSlash()) + .deleteDocument(getItemLink(itemId), internalObjectNode, requestOptions) .map(response -> itemResponseAccessor.createCosmosItemResponse(response, Object.class, CosmosItemSerializer.DEFAULT_SERIALIZER)) .single(); CosmosAsyncClient client = database.getClient(); @@ -2168,7 +2168,7 @@ private Mono> replaceItemInternalCore( return this.getDatabase() .getDocClientWrapper() - .replaceDocument(getItemLink(itemId), doc, requestOptions, getLinkWithoutTrailingSlash()) + .replaceDocument(getItemLink(itemId), doc, requestOptions) .map(response -> itemResponseAccessor.createCosmosItemResponse(response, itemType, requestOptions.getEffectiveItemSerializer())) .single(); } @@ -2265,7 +2265,7 @@ private Mono> patchItemInternal( Mono> responseMono = this.getDatabase() .getDocClientWrapper() - .patchDocument(getItemLink(itemId), cosmosPatchOperations, requestOptions, this.getLinkWithoutTrailingSlash()) + .patchDocument(getItemLink(itemId), cosmosPatchOperations, requestOptions) .map(response -> itemResponseAccessor.createCosmosItemResponse(response, itemType, requestOptions.getEffectiveItemSerializer())); CosmosAsyncClient client = database @@ -2332,7 +2332,7 @@ private Mono> readItemInternal( RequestOptions requestOptions, Class itemType, Context context) { Mono> responseMono = this.getDatabase().getDocClientWrapper() - .readDocument(getItemLink(itemId), requestOptions, this.getLinkWithoutTrailingSlash()) + .readDocument(getItemLink(itemId), requestOptions) .map(response -> itemResponseAccessor.createCosmosItemResponse(response, itemType, requestOptions.getEffectiveItemSerializer())) .single(); CosmosAsyncClient client = database diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index 5d3e7088f1d7..efd7f4ca26e3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -610,7 +610,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response with the replaced document or an error. */ - Mono> replaceDocument(String documentLink, Object document, RequestOptions options, String collectionLink); + Mono> replaceDocument(String documentLink, Object document, RequestOptions options); /** * Apply patch on an item. @@ -625,7 +625,7 @@ Mono> upsertDocument(String collectionLink, Object do * * @return a {@link Mono} containing the single resource response with the patched document or an error. */ - Mono> patchDocument(String documentLink, CosmosPatchOperations cosmosPatchOperations, RequestOptions options, String collectionLink); + Mono> patchDocument(String documentLink, CosmosPatchOperations cosmosPatchOperations, RequestOptions options); /** * Replaces a document with the passed in document. @@ -638,7 +638,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response with the replaced document or an error. */ - Mono> replaceDocument(Document document, RequestOptions options, String collectionLink); + Mono> replaceDocument(Document document, RequestOptions options); /** * Deletes a document @@ -651,7 +651,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response for the deleted document or an error. */ - Mono> deleteDocument(String documentLink, RequestOptions options, String collectionLink); + Mono> deleteDocument(String documentLink, RequestOptions options); /** * Deletes a document @@ -664,7 +664,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response for the deleted document or an error. */ - Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options, String collectionLink); + Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options); Mono> deleteAllDocumentsByPartitionKey(String collectionLink, PartitionKey partitionKey, RequestOptions options); /** @@ -678,7 +678,7 @@ Mono> upsertDocument(String collectionLink, Object do * @param options the request options. * @return a {@link Mono} containing the single resource response with the read document or an error. */ - Mono> readDocument(String documentLink, RequestOptions options, String collectionLink); + Mono> readDocument(String documentLink, RequestOptions options); /** * Reads all documents in a document collection. diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index f6b40fa40b63..c3fc8c151f3d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -47,7 +47,7 @@ public class ClientSideRequestStatistics { private Set failedReplicas; private Instant requestStartTimeUTC; private Instant requestEndTimeUTC; - private Set regionsContacted; +// private Set regionsContacted; private NavigableSet regionsContactedWithContext; private Set locationEndpointsContacted; private RetryContext retryContext; @@ -71,7 +71,7 @@ public ClientSideRequestStatistics(DiagnosticsClientContext diagnosticsClientCon this.addressResolutionStatistics = new HashMap<>(); this.contactedReplicas = Collections.synchronizedList(new ArrayList<>()); this.failedReplicas = Collections.synchronizedSet(new HashSet<>()); - this.regionsContacted = Collections.synchronizedSet(new HashSet<>()); +// this.regionsContacted = Collections.synchronizedSet(new HashSet<>()); this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>()); this.locationEndpointsContacted = Collections.synchronizedSet(new HashSet<>()); this.metadataDiagnosticsContext = new MetadataDiagnosticsContext(); @@ -93,7 +93,7 @@ public ClientSideRequestStatistics(ClientSideRequestStatistics toBeCloned) { this.addressResolutionStatistics = new HashMap<>(toBeCloned.addressResolutionStatistics); this.contactedReplicas = Collections.synchronizedList(new ArrayList<>(toBeCloned.contactedReplicas)); this.failedReplicas = Collections.synchronizedSet(new HashSet<>(toBeCloned.failedReplicas)); - this.regionsContacted = Collections.synchronizedSet(new HashSet<>(toBeCloned.regionsContacted)); +// this.regionsContacted = Collections.synchronizedSet(new HashSet<>(toBeCloned.regionsContacted)); this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>(toBeCloned.regionsContactedWithContext)); this.locationEndpointsContacted = Collections.synchronizedSet( new HashSet<>(toBeCloned.locationEndpointsContacted)); @@ -187,7 +187,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost if (locationEndPoint != null) { storeResponseStatistics.regionName = globalEndpointManager.getRegionName(locationEndPoint, request.getOperationType()); - this.regionsContacted.add(storeResponseStatistics.regionName); +// this.regionsContacted.add(storeResponseStatistics.regionName); this.locationEndpointsContacted.add(locationEndPoint); this.regionsContactedWithContext.add(new RegionWithContext(storeResponseStatistics.regionName, locationEndPoint)); } @@ -224,7 +224,7 @@ public void recordGatewayResponse( String regionName = globalEndpointManager.getRegionName(locationEndPoint, rxDocumentServiceRequest.getOperationType()); - this.regionsContacted.add(regionName); +// this.regionsContacted.add(regionName); this.locationEndpointsContacted.add(locationEndPoint); this.regionsContactedWithContext.add(new RegionWithContext(regionName, locationEndPoint)); @@ -437,20 +437,20 @@ private void mergeRegionWithContextSet(NavigableSet other) { } } - private void mergeRegionsContacted(Set other) { - if (other == null) { - return; - } - - if (this.regionsContacted == null || this.regionsContacted.isEmpty()) { - this.regionsContacted = other; - return; - } - - for (String region : other) { - this.regionsContacted.add(region); - } - } +// private void mergeRegionsContacted(Set other) { +// if (other == null) { +// return; +// } +// +// if (this.regionsContacted == null || this.regionsContacted.isEmpty()) { +// this.regionsContacted = other; +// return; +// } +// +// for (String region : other) { +// this.regionsContacted.add(region); +// } +// } private void mergeStartTime(Instant other) { if (other == null) { @@ -497,7 +497,7 @@ public void mergeClientSideRequestStatistics(ClientSideRequestStatistics other) this.mergeContactedReplicas(other.contactedReplicas); this.mergeFailedReplica(other.failedReplicas); this.mergeLocationEndpointsContacted(other.locationEndpointsContacted); - this.mergeRegionsContacted(other.regionsContacted); +// this.mergeRegionsContacted(other.regionsContacted); this.mergeRegionWithContextSet(other.regionsContactedWithContext); this.mergeStartTime(other.requestStartTimeUTC); this.mergeEndTime(other.requestEndTimeUTC); @@ -529,12 +529,12 @@ public void setFailedReplicas(Set failedReplicas) { } public Set getContactedRegionNames() { - return regionsContacted; + return this.regionsContactedWithContext.stream().map(regionWithContext -> regionWithContext.regionContacted).collect(Collectors.toSet()); } - public void setRegionsContacted(Set regionsContacted) { - this.regionsContacted = Collections.synchronizedSet(regionsContacted); - } +// public void setRegionsContacted(Set regionsContacted) { +// this.regionsContacted = Collections.synchronizedSet(regionsContacted); +// } public Set getLocationEndpointsContacted() { return locationEndpointsContacted; @@ -740,7 +740,7 @@ public void serialize( generator.writeObjectField("responseStatisticsList", statistics.responseStatisticsList); generator.writeObjectField("supplementalResponseStatisticsList", getCappedSupplementalResponseStatisticsList(statistics.supplementalResponseStatisticsList)); generator.writeObjectField("addressResolutionStatistics", statistics.addressResolutionStatistics); - generator.writeObjectField("regionsContacted", statistics.regionsContacted); + generator.writeObjectField("regionsContacted", statistics.getContactedRegionNames()); generator.writeObjectField("retryContext", statistics.retryContext); generator.writeObjectField("metadataDiagnosticsContext", statistics.getMetadataDiagnosticsContext()); generator.writeObjectField("serializationDiagnosticsContext", statistics.getSerializationDiagnosticsContext()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 91dee8880b3a..c67b6b21599e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2577,7 +2577,9 @@ private Mono> upsertDocumentInternal( @Override public Mono> replaceDocument(String documentLink, Object document, - RequestOptions options, String collectionLink) { + RequestOptions options) { + + String collectionLink = Utils.getCollectionName(documentLink); return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, @@ -2673,7 +2675,10 @@ private Mono> replaceDocumentInternal( } @Override - public Mono> replaceDocument(Document document, RequestOptions options, String collectionLink) { + public Mono> replaceDocument(Document document, RequestOptions options) { + + String collectionLink = Utils.getCollectionName(document.getSelfLink()); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Replace, @@ -2864,8 +2869,10 @@ private CosmosEndToEndOperationLatencyPolicyConfig getEffectiveEndToEndOperation @Override public Mono> patchDocument(String documentLink, CosmosPatchOperations cosmosPatchOperations, - RequestOptions options, - String collectionLink) { + RequestOptions options) { + + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Patch, @@ -3000,7 +3007,10 @@ private Mono> patchDocumentInternal( } @Override - public Mono> deleteDocument(String documentLink, RequestOptions options, String collectionLink) { + public Mono> deleteDocument(String documentLink, RequestOptions options) { + + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, @@ -3018,7 +3028,10 @@ public Mono> deleteDocument(String documentLink, Requ } @Override - public Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options, String collectionLink) { + public Mono> deleteDocument(String documentLink, InternalObjectNode internalObjectNode, RequestOptions options) { + + String collectionLink = Utils.getCollectionName(documentLink); + return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, OperationType.Delete, @@ -3168,15 +3181,16 @@ private Mono> deleteAllDocumentsByPartitionKeyInterna } @Override - public Mono> readDocument(String documentLink, RequestOptions options, String collectionLink) { - return readDocument(documentLink, options, this, collectionLink); + public Mono> readDocument(String documentLink, RequestOptions options) { + return readDocument(documentLink, options, this); } private Mono> readDocument( String documentLink, RequestOptions options, - DiagnosticsClientContext innerDiagnosticsFactory, - String collectionLink) { + DiagnosticsClientContext innerDiagnosticsFactory) { + + String collectionLinkDuplicate = Utils.getCollectionName(documentLink); return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, @@ -3185,7 +3199,7 @@ private Mono> readDocument( options, false, innerDiagnosticsFactory, - collectionLink); + collectionLinkDuplicate); } private Mono> readDocumentCore( @@ -3369,8 +3383,7 @@ public Mono> readMany( partitionRangeItemKeyMap, resourceLink, state.getQueryOptions(), - klass, - collectionLink); + klass); // create the executable query Flux> queries = queryForReadMany( @@ -3653,8 +3666,7 @@ private Flux> pointReadsForReadMany( List> singleItemPartitionRequestMap, String resourceLink, CosmosQueryRequestOptions queryRequestOptions, - Class klass, - String collectionLink) { + Class klass) { // if there is any factory method being passed in, use the factory method to deserializ the object // else fallback to use the original way @@ -3670,7 +3682,7 @@ private Flux> pointReadsForReadMany( .getCosmosQueryRequestOptionsAccessor() .toRequestOptions(queryRequestOptions); requestOptions.setPartitionKey(firstIdentity.getPartitionKey()); - return this.readDocument((resourceLink + firstIdentity.getId()), requestOptions, diagnosticsFactory, collectionLink) + return this.readDocument((resourceLink + firstIdentity.getId()), requestOptions, diagnosticsFactory) .flatMap(resourceResponse -> Mono.just( new ImmutablePair, CosmosException>(resourceResponse, null) )) From d60adda9159daec68d676544b67c93fdfd65e5e5 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 29 Jun 2024 17:54:52 -0400 Subject: [PATCH 097/140] Reacting to review comments. --- .../azure/cosmos/implementation/Configs.java | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 8cb3e156e60e..9178f5c5e856 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -622,45 +622,51 @@ public static PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreaker } public static int getStaleCollectionCacheRefreshRetryCount() { + String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); - if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { - return Integer.valueOf(valueFromSystemProperty); + if (!StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Integer.parseInt(valueFromSystemProperty); } String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); - if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { - return Integer.valueOf(valueFromEnvVariable); + + if (!StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Integer.parseInt(valueFromEnvVariable); } return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT; } public static int getStaleCollectionCacheRefreshRetryIntervalInSeconds() { + String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); - if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { - return Integer.valueOf(valueFromSystemProperty); + if (!StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Integer.parseInt(valueFromSystemProperty); } String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); - if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { - return Integer.valueOf(valueFromEnvVariable); + + if (!StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Integer.parseInt(valueFromEnvVariable); } return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS; } public static int getStalePartitionUnavailabilityRefreshIntervalInSeconds() { + String valueFromSystemProperty = System.getProperty(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); - if (valueFromSystemProperty != null && !valueFromSystemProperty.isEmpty()) { - return Integer.valueOf(valueFromSystemProperty); + if (!StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Integer.parseInt(valueFromSystemProperty); } String valueFromEnvVariable = System.getenv(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); - if (valueFromEnvVariable != null && !valueFromEnvVariable.isEmpty()) { - return Integer.valueOf(valueFromEnvVariable); + + if (!StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Integer.parseInt(valueFromEnvVariable); } return DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS; From 6db23c274e062b0b00856d6909201eb75fa23c2c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 29 Jun 2024 18:17:08 -0400 Subject: [PATCH 098/140] Reacting to review comments. --- .../java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java index 8689e14b1696..9e6416bbe398 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/ReadMyWriteWorkflow.java @@ -229,7 +229,7 @@ private Flux writeDocument(Integer i) { return false; }, - (conflictException) -> client.readDocument(getDocumentLink(document), null, getCollectionLink()) + (conflictException) -> client.readDocument(getDocumentLink(document), null) ) .doOnNext(r -> cache.put(key, r.getResource())) .map(ResourceResponse::getResource).flux(); @@ -245,7 +245,7 @@ private Flux readDocument(Document d) { RequestOptions options = new RequestOptions(); options.setPartitionKey(new PartitionKey(d.getString(partitionKey))); - return client.readDocument(getDocumentLink(d), options, getCollectionLink()) + return client.readDocument(getDocumentLink(d), options) .map(ResourceResponse::getResource).flux(); } From 5295c88c3718c7d2f6dfc39c3cec15ab3b084a64 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 30 Jun 2024 10:36:48 -0400 Subject: [PATCH 099/140] Fixing tests. --- .../com/azure/cosmos/implementation/Configs.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 9178f5c5e856..eb24d159e88d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -625,13 +625,13 @@ public static int getStaleCollectionCacheRefreshRetryCount() { String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); - if (!StringUtils.isNotEmpty(valueFromSystemProperty)) { + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { return Integer.parseInt(valueFromSystemProperty); } String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); - if (!StringUtils.isNotEmpty(valueFromEnvVariable)) { + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { return Integer.parseInt(valueFromEnvVariable); } @@ -642,13 +642,13 @@ public static int getStaleCollectionCacheRefreshRetryIntervalInSeconds() { String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); - if (!StringUtils.isNotEmpty(valueFromSystemProperty)) { + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { return Integer.parseInt(valueFromSystemProperty); } String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); - if (!StringUtils.isNotEmpty(valueFromEnvVariable)) { + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { return Integer.parseInt(valueFromEnvVariable); } @@ -659,13 +659,13 @@ public static int getStalePartitionUnavailabilityRefreshIntervalInSeconds() { String valueFromSystemProperty = System.getProperty(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); - if (!StringUtils.isNotEmpty(valueFromSystemProperty)) { + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { return Integer.parseInt(valueFromSystemProperty); } String valueFromEnvVariable = System.getenv(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); - if (!StringUtils.isNotEmpty(valueFromEnvVariable)) { + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { return Integer.parseInt(valueFromEnvVariable); } From e282d61e8c2f88359b1ab2c725b4c5d60e8a8daa Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 30 Jun 2024 16:21:09 -0400 Subject: [PATCH 100/140] Fixing merge conflicts. --- .../CosmosChangeFeedRequestOptionsImpl.java | 55 +++++++++---- .../cosmos/implementation/RequestOptions.java | 77 +++++++++++++++++-- .../implementation/RxDocumentClientImpl.java | 28 ++++--- .../CosmosChangeFeedRequestOptions.java | 8 +- .../models/CosmosQueryRequestOptions.java | 10 +-- 5 files changed, 137 insertions(+), 41 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java index 7f2959a0a51f..d3bd99832fad 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java @@ -15,6 +15,7 @@ import com.azure.cosmos.implementation.spark.OperationContextAndListenerTuple; import com.azure.cosmos.models.DedicatedGatewayRequestOptions; import com.azure.cosmos.models.FeedRange; +import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.ReadOnlyRequestOptions; import com.azure.cosmos.util.Beta; @@ -42,23 +43,27 @@ public final class CosmosChangeFeedRequestOptionsImpl implements OverridableRequ private CosmosDiagnosticsThresholds thresholds; private List excludeRegions; private CosmosItemSerializer customSerializer; - - public CosmosChangeFeedRequestOptionsImpl(CosmosChangeFeedRequestOptionsImpl topBeCloned) { - this.continuationState = topBeCloned.continuationState; - this.feedRangeInternal = topBeCloned.feedRangeInternal; - this.properties = topBeCloned.properties; - this.maxItemCount = topBeCloned.maxItemCount; - this.maxPrefetchPageCount = topBeCloned.maxPrefetchPageCount; - this.mode = topBeCloned.mode; - this.startFromInternal = topBeCloned.startFromInternal; - this.isSplitHandlingDisabled = topBeCloned.isSplitHandlingDisabled; - this.quotaInfoEnabled = topBeCloned.quotaInfoEnabled; - this.throughputControlGroupName = topBeCloned.throughputControlGroupName; - this.customOptions = topBeCloned.customOptions; - this.operationContextAndListenerTuple = topBeCloned.operationContextAndListenerTuple; - this.thresholds = topBeCloned.thresholds; - this.excludeRegions = topBeCloned.excludeRegions; - this.customSerializer = topBeCloned.customSerializer; + private PartitionKeyDefinition partitionKeyDefinition; + private String collectionRid; + + public CosmosChangeFeedRequestOptionsImpl(CosmosChangeFeedRequestOptionsImpl toBeCloned) { + this.continuationState = toBeCloned.continuationState; + this.feedRangeInternal = toBeCloned.feedRangeInternal; + this.properties = toBeCloned.properties; + this.maxItemCount = toBeCloned.maxItemCount; + this.maxPrefetchPageCount = toBeCloned.maxPrefetchPageCount; + this.mode = toBeCloned.mode; + this.startFromInternal = toBeCloned.startFromInternal; + this.isSplitHandlingDisabled = toBeCloned.isSplitHandlingDisabled; + this.quotaInfoEnabled = toBeCloned.quotaInfoEnabled; + this.throughputControlGroupName = toBeCloned.throughputControlGroupName; + this.customOptions = toBeCloned.customOptions; + this.operationContextAndListenerTuple = toBeCloned.operationContextAndListenerTuple; + this.thresholds = toBeCloned.thresholds; + this.excludeRegions = toBeCloned.excludeRegions; + this.customSerializer = toBeCloned.customSerializer; + this.collectionRid = toBeCloned.collectionRid; + this.partitionKeyDefinition = toBeCloned.partitionKeyDefinition; } public CosmosChangeFeedRequestOptionsImpl( @@ -326,6 +331,22 @@ private void addCustomOptionsForFullFidelityMode() { HttpConstants.ChangeFeedWireFormatVersions.SEPARATE_METADATA_WITH_CRTS); } + public PartitionKeyDefinition getPartitionKeyDefinition() { + return partitionKeyDefinition; + } + + public void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { + this.partitionKeyDefinition = partitionKeyDefinition; + } + + public String getCollectionRid() { + return collectionRid; + } + + public void setCollectionRid(String collectionRid) { + this.collectionRid = collectionRid; + } + @Override public void override(ReadOnlyRequestOptions readOnlyRequestOptions) { this.maxItemCount = overrideOption(readOnlyRequestOptions.getMaxItemCount(), this.maxItemCount); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java index 8ba006b76c0b..3f1462ba7170 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java @@ -26,7 +26,7 @@ /** * Encapsulates options that can be specified for a request issued to the Azure Cosmos DB database service. */ -public class RequestOptions { +public class RequestOptions implements OverridableRequestOptions { private Map customOptions; private List preTriggerInclude; private List postTriggerInclude; @@ -49,9 +49,9 @@ public class RequestOptions { private OperationContextAndListenerTuple operationContextAndListenerTuple; private DedicatedGatewayRequestOptions dedicatedGatewayRequestOptions; private CosmosDiagnosticsThresholds thresholds; - + private boolean useTrackingIds; private String trackingId; - private boolean nonIdempotentWriteRetriesEnabled = false; + private Boolean nonIdempotentWriteRetriesEnabled; private CosmosEndToEndOperationLatencyPolicyConfig endToEndOperationLatencyConfig; private List excludeRegions; @@ -151,7 +151,8 @@ public RequestOptions setNonIdempotentWriteRetriesEnabled(boolean enabled) { return this; } - public boolean getNonIdempotentWriteRetriesEnabled() { + @Override + public Boolean getNonIdempotentWriteRetriesEnabled() { return this.nonIdempotentWriteRetriesEnabled; } @@ -258,6 +259,7 @@ public String getTrackingId() { * * @return the consistency level. */ + @Override public ConsistencyLevel getConsistencyLevel() { return this.consistencyLevel; } @@ -463,6 +465,7 @@ public void setProperties(Map properties) { * * @return a boolean indicating whether payload will be included in the response or not for this request. */ + @Override public Boolean isContentResponseOnWriteEnabled() { return contentResponseOnWriteEnabled; } @@ -489,6 +492,7 @@ public void setContentResponseOnWriteEnabled(Boolean contentResponseOnWriteEnabl this.contentResponseOnWriteEnabled = contentResponseOnWriteEnabled; } + @Override public String getThroughputControlGroupName() { return this.throughputControlGroupName; } @@ -497,6 +501,7 @@ public void setThroughputControlGroupName(String throughputControlGroupName) { this.throughputControlGroupName = throughputControlGroupName; } + @Override public DedicatedGatewayRequestOptions getDedicatedGatewayRequestOptions() { return dedicatedGatewayRequestOptions; } @@ -505,10 +510,56 @@ public void setDedicatedGatewayRequestOptions(DedicatedGatewayRequestOptions ded this.dedicatedGatewayRequestOptions = dedicatedGatewayRequestOptions; } + @Override public CosmosDiagnosticsThresholds getDiagnosticsThresholds() { return this.thresholds; } + @Override + public Boolean isScanInQueryEnabled() { + return null; + } + + @Override + public Integer getMaxDegreeOfParallelism() { + return null; + } + + @Override + public Integer getMaxBufferedItemCount() { + return null; + } + + @Override + public Integer getResponseContinuationTokenLimitInKb() { + return null; + } + + @Override + public Integer getMaxItemCount() { + return null; + } + + @Override + public Boolean isQueryMetricsEnabled() { + return null; + } + + @Override + public Boolean isIndexMetricsEnabled() { + return null; + } + + @Override + public Integer getMaxPrefetchPageCount() { + return null; + } + + @Override + public String getQueryNameOrDefault(String defaultQueryName) { + return null; + } + public void setDiagnosticsThresholds(CosmosDiagnosticsThresholds thresholds) { this.thresholds = thresholds; } @@ -530,15 +581,17 @@ public void setCosmosEndToEndLatencyPolicyConfig(CosmosEndToEndOperationLatencyP this.endToEndOperationLatencyConfig = endToEndOperationLatencyPolicyConfig; } + @Override public CosmosEndToEndOperationLatencyPolicyConfig getCosmosEndToEndLatencyPolicyConfig(){ return this.endToEndOperationLatencyConfig; } - public List getExcludeRegions() { + @Override + public List getExcludedRegions() { return this.excludeRegions; } - public void setExcludeRegions(List excludeRegions) { + public void setExcludedRegions(List excludeRegions) { this.excludeRegions = excludeRegions; } @@ -546,6 +599,18 @@ public AtomicReference getMarkE2ETimeoutInRequestContextCallbackHook() return this.markE2ETimeoutInRequestContextCallbackHook; } + @Override + public void override(ReadOnlyRequestOptions cosmosCommonRequestOptions) { + this.consistencyLevel = overrideOption(cosmosCommonRequestOptions.getConsistencyLevel(), this.consistencyLevel); + this.contentResponseOnWriteEnabled = overrideOption(cosmosCommonRequestOptions.isContentResponseOnWriteEnabled(), this.contentResponseOnWriteEnabled); + this.nonIdempotentWriteRetriesEnabled = overrideOption(cosmosCommonRequestOptions.getNonIdempotentWriteRetriesEnabled(), this.nonIdempotentWriteRetriesEnabled); + this.dedicatedGatewayRequestOptions = overrideOption(cosmosCommonRequestOptions.getDedicatedGatewayRequestOptions(), this.dedicatedGatewayRequestOptions); + this.excludeRegions = overrideOption(cosmosCommonRequestOptions.getExcludedRegions(), this.excludeRegions); + this.throughputControlGroupName = overrideOption(cosmosCommonRequestOptions.getThroughputControlGroupName(), this.throughputControlGroupName); + this.thresholds = overrideOption(cosmosCommonRequestOptions.getDiagnosticsThresholds(), this.thresholds); + this.endToEndOperationLatencyConfig = overrideOption(cosmosCommonRequestOptions.getCosmosEndToEndLatencyPolicyConfig(), this.endToEndOperationLatencyConfig); + } + public CosmosItemSerializer getEffectiveItemSerializer() { return this.effectiveItemSerializer; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index d2d607081cf5..5c278881635f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -74,6 +74,7 @@ import com.azure.cosmos.models.CosmosItemIdentity; import com.azure.cosmos.models.CosmosItemRequestOptions; import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.CosmosOperationDetails; import com.azure.cosmos.models.CosmosPatchOperations; import com.azure.cosmos.models.CosmosQueryRequestOptions; import com.azure.cosmos.models.FeedRange; @@ -170,6 +171,12 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization ImplementationBridgeHelpers.CosmosItemResponseHelper.CosmosItemResponseBuilderAccessor itemResponseAccessor = ImplementationBridgeHelpers.CosmosItemResponseHelper.getCosmosItemResponseBuilderAccessor(); + private static final ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.CosmosChangeFeedRequestOptionsAccessor changeFeedOptionsAccessor = + ImplementationBridgeHelpers.CosmosChangeFeedRequestOptionsHelper.getCosmosChangeFeedRequestOptionsAccessor(); + + private static final ImplementationBridgeHelpers.CosmosOperationDetailsHelper.CosmosOperationDetailsAccessor operationDetailsAccessor = + ImplementationBridgeHelpers.CosmosOperationDetailsHelper.getCosmosOperationDetailsAccessor(); + private static final String tempMachineId = "uuid:" + UUID.randomUUID(); private static final AtomicInteger activeClientsCnt = new AtomicInteger(0); private static final Map clientMap = new ConcurrentHashMap<>(); @@ -250,6 +257,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization private final boolean sessionCapturingOverrideEnabled; private final boolean sessionCapturingDisabled; private final boolean isRegionScopedSessionCapturingEnabledOnClientOrSystemConfig; + private List operationPolicies; public RxDocumentClientImpl(URI serviceEndpoint, String masterKeyOrResourceToken, @@ -315,7 +323,8 @@ public RxDocumentClientImpl(URI serviceEndpoint, SessionRetryOptions sessionRetryOptions, CosmosContainerProactiveInitConfig containerProactiveInitConfig, CosmosItemSerializer defaultCustomSerializer, - boolean isRegionScopedSessionCapturingEnabled) { + boolean isRegionScopedSessionCapturingEnabled, + List operationPolicies) { this( serviceEndpoint, masterKeyOrResourceToken, @@ -338,6 +347,7 @@ public RxDocumentClientImpl(URI serviceEndpoint, defaultCustomSerializer, isRegionScopedSessionCapturingEnabled); this.cosmosAuthorizationTokenResolver = cosmosAuthorizationTokenResolver; + this.operationPolicies = operationPolicies; } private RxDocumentClientImpl(URI serviceEndpoint, @@ -1838,7 +1848,7 @@ private Mono getCreateDocumentRequest(DocumentClientRe getEffectiveClientContext(clientContextOverride), operationType, ResourceType.Document, path, requestHeaders, options, content); - if (operationType.isWriteOperation() && options != null && options.getNonIdempotentWriteRetriesEnabled()) { + if (operationType.isWriteOperation() && options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled()) { request.setNonIdempotentWriteRetriesEnabled(true); } @@ -2797,7 +2807,7 @@ private Mono> replaceDocumentInternal( getEffectiveClientContext(clientContextOverride), OperationType.Replace, ResourceType.Document, path, requestHeaders, options, content); - if (options != null && options.getNonIdempotentWriteRetriesEnabled()) { + if (options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled()) { request.setNonIdempotentWriteRetriesEnabled(true); } @@ -2963,7 +2973,7 @@ private Mono> patchDocumentInternal( options, content); - if (options != null && options.getNonIdempotentWriteRetriesEnabled()) { + if (options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled()) { request.setNonIdempotentWriteRetriesEnabled(true); } if (options != null) { @@ -3191,7 +3201,7 @@ private Mono> readDocument( RequestOptions options, DiagnosticsClientContext innerDiagnosticsFactory) { - String collectionLinkDuplicate = Utils.getCollectionName(documentLink); + String collectionLink = Utils.getCollectionName(documentLink); return wrapPointOperationWithAvailabilityStrategy( ResourceType.Document, @@ -3200,7 +3210,7 @@ private Mono> readDocument( options, false, innerDiagnosticsFactory, - collectionLinkDuplicate); + collectionLink); } private Mono> readDocumentCore( @@ -5976,9 +5986,9 @@ private Mono> wrapPointOperationWithAvailabilityStrat monoList.add(initialMonoAcrossAllRegions); } } else { - clonedOptions.setExcludeRegions( + clonedOptions.setExcludedRegions( getEffectiveExcludedRegionsForHedging( - nonNullRequestOptions.getExcludeRegions(), + nonNullRequestOptions.getExcludedRegions(), orderedApplicableRegionsForSpeculation, region) ); @@ -6210,7 +6220,7 @@ private List getApplicableRegionsForSpeculation( resourceType, operationType, isIdempotentWriteRetriesEnabled, - options.getExcludeRegions()); + options.getExcludedRegions()); } private List getApplicableRegionsForSpeculation( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java index eb6ebff5002e..b12ae8905d1d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosChangeFeedRequestOptions.java @@ -537,19 +537,19 @@ CosmosChangeFeedRequestOptionsImpl getImpl() { } String getCollectionRid() { - return collectionRid; + return this.actualRequestOptions.getCollectionRid(); } void setCollectionRid(String collectionRid) { - this.collectionRid = collectionRid; + this.actualRequestOptions.setCollectionRid(collectionRid); } PartitionKeyDefinition getPartitionKeyDefinition() { - return partitionKeyDefinition; + return this.actualRequestOptions.getPartitionKeyDefinition(); } void setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { - this.partitionKeyDefinition = partitionKeyDefinition; + this.actualRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); } /////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index b1b7354b546d..a3f65e5db054 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -622,6 +622,11 @@ public String getRequestContinuation(CosmosQueryRequestOptions options) { return options.getRequestContinuation(); } + @Override + public Integer getMaxItemCountForVectorSearch(CosmosQueryRequestOptions options) { + return options.getMaxItemCountForVectorSearch(); + } + @Override public void setPartitionKeyDefinition(CosmosQueryRequestOptions options, PartitionKeyDefinition partitionKeyDefinition) { options.actualRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); @@ -641,11 +646,6 @@ public void setCollectionRid(CosmosQueryRequestOptions options, String collectio public String getCollectionRid(CosmosQueryRequestOptions options) { return options.actualRequestOptions.getCollectionRid(); } - - @Override - public Integer getMaxItemCountForVectorSearch(CosmosQueryRequestOptions options) { - return options.getMaxItemCountForVectorSearch(); - } }); } From e7b65a33ab7402c5d3f2e510ecba566ee29735c4 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 1 Jul 2024 12:17:08 -0400 Subject: [PATCH 101/140] Refactoring. --- .../PartitionLevelCircuitBreakerTests.java | 4 +- .../implementation/ClientRetryPolicy.java | 6 +- .../ClientSideRequestStatistics.java | 48 +++---- .../cosmos/implementation/HttpConstants.java | 1 + .../RequestTimeoutException.java | 14 ++ ...nsecutiveExceptionBasedCircuitBreaker.java | 123 ++++++++++-------- ...itionEndpointManagerForCircuitBreaker.java | 36 ++--- .../circuitBreaker/ICircuitBreaker.java | 7 - .../LocationSpecificHealthContext.java | 104 ++++++++++++--- ...pecificHealthContextTransitionHandler.java | 80 +++++++----- .../rntbd/RntbdRequestRecord.java | 2 +- 11 files changed, 264 insertions(+), 161 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 4485618f813e..9837f1f874de 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -2747,9 +2747,9 @@ private static int getAverageExceptionCountByPartitionKeyRangeByRegion( boolean failuresExist = false; for (LocationSpecificHealthContext locationSpecificHealthContext : locationEndpointToLocationSpecificContextForPartition.values()) { - count += locationSpecificHealthContext.getExceptionCountForRead() + locationSpecificHealthContext.getExceptionCountForWrite(); + count += locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() + locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); - if (locationSpecificHealthContext.getExceptionCountForRead() + locationSpecificHealthContext.getExceptionCountForWrite() > 0) { + if (locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() + locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking() > 0) { failuresExist = true; regionCountWithFailures++; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 470f33e14a02..39f228ad893c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -165,9 +165,11 @@ public Mono shouldRetry(Exception e) { clientException); } - if (clientException != null && Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.REQUEST_TIMEOUT)) { + if (clientException != null + && Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.REQUEST_TIMEOUT) + && Exceptions.isSubStatusCode(clientException, HttpConstants.SubStatusCodes.TRANSIT_TIMEOUT)) { logger.info( - "Request timeout - IsReadRequest {}, IsWebExceptionRetriable {}, NonIdempotentWriteRetriesEnabled {}", + "Request timeout - IsReadRequest {}, IsWebExceptionRetriable {}, NonIdempotentWriteRetriesEnabled {}", this.isReadRequest, false, this.request.getNonIdempotentWriteRetriesEnabled(), diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index c3fc8c151f3d..0c7abf809bc5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -47,7 +47,7 @@ public class ClientSideRequestStatistics { private Set failedReplicas; private Instant requestStartTimeUTC; private Instant requestEndTimeUTC; -// private Set regionsContacted; + private Set regionsContacted; private NavigableSet regionsContactedWithContext; private Set locationEndpointsContacted; private RetryContext retryContext; @@ -71,7 +71,7 @@ public ClientSideRequestStatistics(DiagnosticsClientContext diagnosticsClientCon this.addressResolutionStatistics = new HashMap<>(); this.contactedReplicas = Collections.synchronizedList(new ArrayList<>()); this.failedReplicas = Collections.synchronizedSet(new HashSet<>()); -// this.regionsContacted = Collections.synchronizedSet(new HashSet<>()); + this.regionsContacted = Collections.synchronizedSet(new HashSet<>()); this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>()); this.locationEndpointsContacted = Collections.synchronizedSet(new HashSet<>()); this.metadataDiagnosticsContext = new MetadataDiagnosticsContext(); @@ -93,7 +93,7 @@ public ClientSideRequestStatistics(ClientSideRequestStatistics toBeCloned) { this.addressResolutionStatistics = new HashMap<>(toBeCloned.addressResolutionStatistics); this.contactedReplicas = Collections.synchronizedList(new ArrayList<>(toBeCloned.contactedReplicas)); this.failedReplicas = Collections.synchronizedSet(new HashSet<>(toBeCloned.failedReplicas)); -// this.regionsContacted = Collections.synchronizedSet(new HashSet<>(toBeCloned.regionsContacted)); + this.regionsContacted = Collections.synchronizedSet(new HashSet<>(toBeCloned.regionsContacted)); this.regionsContactedWithContext = Collections.synchronizedNavigableSet(new TreeSet<>(toBeCloned.regionsContactedWithContext)); this.locationEndpointsContacted = Collections.synchronizedSet( new HashSet<>(toBeCloned.locationEndpointsContacted)); @@ -187,7 +187,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost if (locationEndPoint != null) { storeResponseStatistics.regionName = globalEndpointManager.getRegionName(locationEndPoint, request.getOperationType()); -// this.regionsContacted.add(storeResponseStatistics.regionName); + this.regionsContacted.add(storeResponseStatistics.regionName); this.locationEndpointsContacted.add(locationEndPoint); this.regionsContactedWithContext.add(new RegionWithContext(storeResponseStatistics.regionName, locationEndPoint)); } @@ -224,7 +224,7 @@ public void recordGatewayResponse( String regionName = globalEndpointManager.getRegionName(locationEndPoint, rxDocumentServiceRequest.getOperationType()); -// this.regionsContacted.add(regionName); + this.regionsContacted.add(regionName); this.locationEndpointsContacted.add(locationEndPoint); this.regionsContactedWithContext.add(new RegionWithContext(regionName, locationEndPoint)); @@ -437,20 +437,20 @@ private void mergeRegionWithContextSet(NavigableSet other) { } } -// private void mergeRegionsContacted(Set other) { -// if (other == null) { -// return; -// } -// -// if (this.regionsContacted == null || this.regionsContacted.isEmpty()) { -// this.regionsContacted = other; -// return; -// } -// -// for (String region : other) { -// this.regionsContacted.add(region); -// } -// } + private void mergeRegionsContacted(Set other) { + if (other == null) { + return; + } + + if (this.regionsContacted == null || this.regionsContacted.isEmpty()) { + this.regionsContacted = other; + return; + } + + for (String region : other) { + this.regionsContacted.add(region); + } + } private void mergeStartTime(Instant other) { if (other == null) { @@ -497,7 +497,7 @@ public void mergeClientSideRequestStatistics(ClientSideRequestStatistics other) this.mergeContactedReplicas(other.contactedReplicas); this.mergeFailedReplica(other.failedReplicas); this.mergeLocationEndpointsContacted(other.locationEndpointsContacted); -// this.mergeRegionsContacted(other.regionsContacted); + this.mergeRegionsContacted(other.regionsContacted); this.mergeRegionWithContextSet(other.regionsContactedWithContext); this.mergeStartTime(other.requestStartTimeUTC); this.mergeEndTime(other.requestEndTimeUTC); @@ -529,12 +529,12 @@ public void setFailedReplicas(Set failedReplicas) { } public Set getContactedRegionNames() { - return this.regionsContactedWithContext.stream().map(regionWithContext -> regionWithContext.regionContacted).collect(Collectors.toSet()); + return regionsContacted; } -// public void setRegionsContacted(Set regionsContacted) { -// this.regionsContacted = Collections.synchronizedSet(regionsContacted); -// } + public void setRegionsContacted(Set regionsContacted) { + this.regionsContacted = Collections.synchronizedSet(regionsContacted); + } public Set getLocationEndpointsContacted() { return locationEndpointsContacted; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java index 2f91d556ec94..a6a9fd38b385 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java @@ -448,6 +448,7 @@ public static class SubStatusCodes { public static final int INVALID_BACKEND_RESPONSE = 20908; public static final int UNKNOWN_QUORUM_RESULT = 20909; public static final int INVALID_RESULT = 20910; + public static final int TRANSIT_TIMEOUT = 20911; //SDK Codes (Server) // IMPORTANT - whenever possible use consistency substatus codes that .Net SDK also uses diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java index 510781c74b59..e89cb494ab84 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestTimeoutException.java @@ -49,6 +49,10 @@ public RequestTimeoutException(String message, URI requestUri) { this(message, null, null, requestUri); } + public RequestTimeoutException(String message, URI requestUri, int subStatusCode) { + this(message, null, null, requestUri, subStatusCode); + } + /** * Instantiates a new Request timeout exception. * @@ -95,4 +99,14 @@ public RequestTimeoutException(String message, HttpHeaders headers, SocketAddres super(message, innerException, HttpUtils.asMap(headers), HttpConstants.StatusCodes.REQUEST_TIMEOUT, requestUrl != null ? requestUrl.toString() : null); } + + RequestTimeoutException(String message, + Exception innerException, + HttpHeaders headers, + URI requestUrl, + int subStatusCode) { + super(message, innerException, HttpUtils.asMap(headers), HttpConstants.StatusCodes.REQUEST_TIMEOUT, + requestUrl != null ? requestUrl.toString() : null); + BridgeInternal.setSubStatusCode(this, subStatusCode); + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 0fc88faa4803..123676862089 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -6,7 +6,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class ConsecutiveExceptionBasedCircuitBreaker implements ICircuitBreaker { +public class ConsecutiveExceptionBasedCircuitBreaker { private static final Logger logger = LoggerFactory.getLogger(ConsecutiveExceptionBasedCircuitBreaker.class); private final PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig; @@ -18,7 +18,7 @@ public ConsecutiveExceptionBasedCircuitBreaker(PartitionLevelCircuitBreakerConfi public LocationSpecificHealthContext handleException(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int exceptionCountAfterHandling - = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); + = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); @@ -32,23 +32,27 @@ public LocationSpecificHealthContext handleException(LocationSpecificHealthConte int successCountAfterHandling = 0; if (isReadOnlyRequest) { - return new LocationSpecificHealthContext( - locationSpecificHealthContext.getSuccessCountForWrite(), - locationSpecificHealthContext.getExceptionCountForWrite(), - successCountAfterHandling, - exceptionCountAfterHandling, - locationSpecificHealthContext.getUnavailableSince(), - locationSpecificHealthContext.getLocationHealthStatus(), - locationSpecificHealthContext.isExceptionThresholdBreached()); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) + .withSuccessCountForReadForRecovery(successCountAfterHandling) + .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) + .build(); } else { - return new LocationSpecificHealthContext( - successCountAfterHandling, - exceptionCountAfterHandling, - locationSpecificHealthContext.getSuccessCountForRead(), - locationSpecificHealthContext.getExceptionCountForRead(), - locationSpecificHealthContext.getUnavailableSince(), - locationSpecificHealthContext.getLocationHealthStatus(), - locationSpecificHealthContext.isExceptionThresholdBreached()); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(successCountAfterHandling) + .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) + .build(); } case Unavailable: throw new IllegalStateException(); @@ -59,10 +63,10 @@ public LocationSpecificHealthContext handleException(LocationSpecificHealthConte public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int exceptionCountAfterHandling - = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); + = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); int successCountAfterHandling - = (isReadOnlyRequest) ? locationSpecificHealthContext.getSuccessCountForRead() : locationSpecificHealthContext.getSuccessCountForWrite(); + = (isReadOnlyRequest) ? locationSpecificHealthContext.getSuccessCountForReadForRecovery() : locationSpecificHealthContext.getSuccessCountForWriteForRecovery(); LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); @@ -74,46 +78,57 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext exceptionCountAfterHandling = 0; if (isReadOnlyRequest) { - return new LocationSpecificHealthContext( - locationSpecificHealthContext.getSuccessCountForWrite(), - locationSpecificHealthContext.getExceptionCountForWrite(), - locationSpecificHealthContext.getSuccessCountForRead(), - exceptionCountAfterHandling, - locationSpecificHealthContext.getUnavailableSince(), - locationSpecificHealthContext.getLocationHealthStatus(), - locationSpecificHealthContext.isExceptionThresholdBreached()); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) + .build(); + } else { - return new LocationSpecificHealthContext( - locationSpecificHealthContext.getSuccessCountForWrite(), - exceptionCountAfterHandling, - locationSpecificHealthContext.getSuccessCountForRead(), - locationSpecificHealthContext.getExceptionCountForRead(), - locationSpecificHealthContext.getUnavailableSince(), - locationSpecificHealthContext.getLocationHealthStatus(), - locationSpecificHealthContext.isExceptionThresholdBreached()); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) + .build(); } case HealthyTentative: successCountAfterHandling++; if (isReadOnlyRequest) { - return new LocationSpecificHealthContext( - locationSpecificHealthContext.getSuccessCountForWrite(), - locationSpecificHealthContext.getExceptionCountForWrite(), - successCountAfterHandling, - exceptionCountAfterHandling, - locationSpecificHealthContext.getUnavailableSince(), - locationSpecificHealthContext.getLocationHealthStatus(), - locationSpecificHealthContext.isExceptionThresholdBreached()); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) + .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) + .withSuccessCountForReadForRecovery(successCountAfterHandling) + .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) + .build(); + } else { - return new LocationSpecificHealthContext( - successCountAfterHandling, - exceptionCountAfterHandling, - locationSpecificHealthContext.getSuccessCountForRead(), - locationSpecificHealthContext.getExceptionCountForRead(), - locationSpecificHealthContext.getUnavailableSince(), - locationSpecificHealthContext.getLocationHealthStatus(), - locationSpecificHealthContext.isExceptionThresholdBreached()); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(successCountAfterHandling) + .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) + .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) + .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) + .build(); + } case Unavailable: throw new IllegalStateException(); @@ -125,7 +140,7 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext public boolean shouldHealthStatusBeDowngraded(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int exceptionCountActual - = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); + = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); return exceptionCountActual >= getAllowedExceptionCountToMaintainStatus(locationSpecificHealthContext.getLocationHealthStatus(), isReadOnlyRequest); } @@ -133,7 +148,7 @@ public boolean shouldHealthStatusBeDowngraded(LocationSpecificHealthContext loca public boolean canHealthStatusBeUpgraded(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { int successCountActual - = isReadOnlyRequest ? locationSpecificHealthContext.getSuccessCountForRead() : locationSpecificHealthContext.getSuccessCountForWrite(); + = isReadOnlyRequest ? locationSpecificHealthContext.getSuccessCountForReadForRecovery() : locationSpecificHealthContext.getSuccessCountForWriteForRecovery(); LocationHealthStatus locationHealthStatus = locationSpecificHealthContext.getLocationHealthStatus(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 631f3d4218c0..864bb795f72f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -259,14 +259,16 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe this.locationEndpointToLocationSpecificContextForPartition.compute(locationWithException, (locationAsKey, locationSpecificContextAsVal) -> { if (locationSpecificContextAsVal == null) { - locationSpecificContextAsVal = new LocationSpecificHealthContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.HealthyWithFailures, - false); + + locationSpecificContextAsVal = new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) + .withExceptionThresholdBreached(false) + .build(); } LocationSpecificHealthContext locationSpecificHealthContextAfterTransition = this.locationSpecificHealthContextTransitionHandler.handleException( @@ -295,14 +297,16 @@ private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, UR LocationSpecificHealthContext locationSpecificHealthContextAfterTransition; if (locationSpecificContextAsVal == null) { - locationSpecificContextAsVal = new LocationSpecificHealthContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.Healthy, - false); + + locationSpecificContextAsVal = new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.Healthy) + .withExceptionThresholdBreached(false) + .build(); } locationSpecificHealthContextAfterTransition = this.locationSpecificHealthContextTransitionHandler.handleSuccess( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java deleted file mode 100644 index d17fc0579a7c..000000000000 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ICircuitBreaker.java +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -package com.azure.cosmos.implementation.circuitBreaker; - -public interface ICircuitBreaker { -} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java index ecd4c92e8fdb..f2bdea913f01 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java @@ -6,27 +6,27 @@ import java.time.Instant; public class LocationSpecificHealthContext { - private final int exceptionCountForWrite; - private final int successCountForWrite; - private final int exceptionCountForRead; - private final int successCountForRead; + private final int exceptionCountForWriteForCircuitBreaking; + private final int successCountForWriteForRecovery; + private final int exceptionCountForReadForCircuitBreaking; + private final int successCountForReadForRecovery; private final Instant unavailableSince; private final LocationHealthStatus locationHealthStatus; private final boolean isExceptionThresholdBreached; - public LocationSpecificHealthContext( - int successCountForWrite, - int exceptionCountForWrite, - int successCountForRead, - int exceptionCountForRead, + LocationSpecificHealthContext( + int successCountForWriteForRecovery, + int exceptionCountForWriteForCircuitBreaking, + int successCountForReadForRecovery, + int exceptionCountForReadForCircuitBreaking, Instant unavailableSince, LocationHealthStatus locationHealthStatus, boolean isExceptionThresholdBreached) { - this.successCountForWrite = successCountForWrite; - this.exceptionCountForWrite = exceptionCountForWrite; - this.exceptionCountForRead = exceptionCountForRead; - this.successCountForRead = successCountForRead; + this.successCountForWriteForRecovery = successCountForWriteForRecovery; + this.exceptionCountForWriteForCircuitBreaking = exceptionCountForWriteForCircuitBreaking; + this.successCountForReadForRecovery = successCountForReadForRecovery; + this.exceptionCountForReadForCircuitBreaking = exceptionCountForReadForCircuitBreaking; this.unavailableSince = unavailableSince; this.locationHealthStatus = locationHealthStatus; this.isExceptionThresholdBreached = isExceptionThresholdBreached; @@ -42,20 +42,20 @@ public boolean isRegionAvailableToProcessRequests() { this.locationHealthStatus == LocationHealthStatus.HealthyTentative; } - public int getExceptionCountForWrite() { - return exceptionCountForWrite; + public int getExceptionCountForWriteForCircuitBreaking() { + return exceptionCountForWriteForCircuitBreaking; } - public int getSuccessCountForWrite() { - return successCountForWrite; + public int getSuccessCountForWriteForRecovery() { + return successCountForWriteForRecovery; } - public int getExceptionCountForRead() { - return exceptionCountForRead; + public int getExceptionCountForReadForCircuitBreaking() { + return exceptionCountForReadForCircuitBreaking; } - public int getSuccessCountForRead() { - return successCountForRead; + public int getSuccessCountForReadForRecovery() { + return successCountForReadForRecovery; } public Instant getUnavailableSince() { @@ -65,4 +65,66 @@ public Instant getUnavailableSince() { public LocationHealthStatus getLocationHealthStatus() { return locationHealthStatus; } + + static class Builder { + + private int exceptionCountForWriteForCircuitBreaking; + private int successCountForWriteForRecovery; + private int exceptionCountForReadForCircuitBreaking; + private int successCountForReadForRecovery; + private Instant unavailableSince; + private LocationHealthStatus locationHealthStatus; + private boolean isExceptionThresholdBreached; + + public Builder() { + } + + public Builder withExceptionCountForWriteForCircuitBreaking(int exceptionCountForWriteForCircuitBreaking) { + this.exceptionCountForWriteForCircuitBreaking = exceptionCountForWriteForCircuitBreaking; + return this; + } + + public Builder withSuccessCountForWriteForRecovery(int successCountForWriteForRecovery) { + this.successCountForWriteForRecovery = successCountForWriteForRecovery; + return this; + } + + public Builder withExceptionCountForReadForCircuitBreaking(int exceptionCountForReadForCircuitBreaking) { + this.exceptionCountForReadForCircuitBreaking = exceptionCountForReadForCircuitBreaking; + return this; + } + + public Builder withSuccessCountForReadForRecovery(int successCountForReadForRecovery) { + this.successCountForReadForRecovery = successCountForReadForRecovery; + return this; + } + + public Builder withUnavailableSince(Instant unavailableSince) { + this.unavailableSince = unavailableSince; + return this; + } + + public Builder withLocationHealthStatus(LocationHealthStatus locationHealthStatus) { + this.locationHealthStatus = locationHealthStatus; + return this; + } + + public Builder withExceptionThresholdBreached(boolean exceptionThresholdBreached) { + isExceptionThresholdBreached = exceptionThresholdBreached; + return this; + } + + public LocationSpecificHealthContext build() { + LocationSpecificHealthContext locationSpecificHealthContext = new LocationSpecificHealthContext( + this.successCountForWriteForRecovery, + this.exceptionCountForWriteForCircuitBreaking, + this.successCountForReadForRecovery, + this.exceptionCountForReadForCircuitBreaking, + this.unavailableSince, + this.locationHealthStatus, + this.isExceptionThresholdBreached); + + return locationSpecificHealthContext; + } + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index 59fa4b3b2400..3cb5e3412fdf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -38,7 +38,7 @@ public LocationSpecificHealthContext handleSuccess( LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificHealthContext.getLocationHealthStatus(); int exceptionCountActual - = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForRead() : locationSpecificHealthContext.getExceptionCountForWrite(); + = isReadOnlyRequest ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); switch (currentLocationHealthStatusSnapshot) { case Healthy: @@ -145,7 +145,7 @@ public LocationSpecificHealthContext handleException( partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getResourceId(), - isReadOnlyRequest ? locationSpecificHealthContextInner.getExceptionCountForRead() : locationSpecificHealthContextInner.getExceptionCountForWrite(), + isReadOnlyRequest ? locationSpecificHealthContextInner.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContextInner.getExceptionCountForWriteForCircuitBreaking(), this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } @@ -190,41 +190,53 @@ public LocationSpecificHealthContext transitionHealthStatus(LocationHealthStatus switch (newStatus) { case Healthy: - return new LocationSpecificHealthContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.Healthy, - false); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.Healthy) + .withExceptionThresholdBreached(false) + .build(); + case HealthyWithFailures: - return new LocationSpecificHealthContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.HealthyWithFailures, - false); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.MAX) + .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) + .withExceptionThresholdBreached(false) + .build(); + case Unavailable: - return new LocationSpecificHealthContext( - 0, - 0, - 0, - 0, - Instant.now(), - LocationHealthStatus.Unavailable, - true); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.now()) + .withLocationHealthStatus(LocationHealthStatus.Unavailable) + .withExceptionThresholdBreached(true) + .build(); + case HealthyTentative: - return new LocationSpecificHealthContext( - 0, - 0, - 0, - 0, - Instant.MAX, - LocationHealthStatus.HealthyTentative, - false); + + return new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withUnavailableSince(Instant.now()) + .withLocationHealthStatus(LocationHealthStatus.HealthyTentative) + .withExceptionThresholdBreached(false) + .build(); + default: throw new IllegalStateException("Unsupported health status: " + newStatus); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java index 1e9d57bdcd35..f3113c5e2d6c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestRecord.java @@ -273,7 +273,7 @@ public boolean expire() { error = new GoneException(this.toString(), null, this.args.physicalAddressUri().getURI(), HttpConstants.SubStatusCodes.TRANSPORT_GENERATED_410); } else { // For sent write request, converting to requestTimeout, will not be retried. - error = new RequestTimeoutException(this.toString(), this.args.physicalAddressUri().getURI()); + error = new RequestTimeoutException(this.toString(), this.args.physicalAddressUri().getURI(), HttpConstants.SubStatusCodes.TRANSIT_TIMEOUT); } BridgeInternal.setRequestHeaders(error, this.args.serviceRequest().getHeaders()); From b950b909af6a2ed5d21feb0dddf912d322076541 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 1 Jul 2024 21:18:56 -0400 Subject: [PATCH 102/140] Refactoring. --- .../ClientSideRequestStatistics.java | 19 +++++++---- .../DocumentServiceRequestContext.java | 12 ++++--- ...itionEndpointManagerForCircuitBreaker.java | 20 +++++------ .../LocationSpecificHealthContext.java | 34 ++++++++++++++++++- 4 files changed, 60 insertions(+), 25 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index 0c7abf809bc5..ddf67f04cd66 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -3,6 +3,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; import com.azure.cosmos.implementation.cpu.CpuMemoryMonitor; import com.azure.cosmos.implementation.directconnectivity.StoreResponseDiagnostics; import com.azure.cosmos.implementation.directconnectivity.StoreResultDiagnostics; @@ -164,7 +165,7 @@ public void recordResponse(RxDocumentServiceRequest request, StoreResultDiagnost this.approximateInsertionCountInBloomFilter = request.requestContext.getApproximateBloomFilterInsertionCount(); storeResponseStatistics.sessionTokenEvaluationResults = request.requestContext.getSessionTokenEvaluationResults(); - storeResponseStatistics.regionToHealthStatusesForPartitionKeyRange = request.requestContext.getRegionToHealthStatusesForPartitionKeyRange(); + storeResponseStatistics.locationToLocationSpecificHealthContext = request.requestContext.getLocationToLocationSpecificHealthContext(); if (request.requestContext.getEndToEndOperationLatencyPolicyConfig() != null) { storeResponseStatistics.e2ePolicyCfg = @@ -238,7 +239,7 @@ public void recordGatewayResponse( if (rxDocumentServiceRequest.requestContext != null) { gatewayStatistics.sessionTokenEvaluationResults = rxDocumentServiceRequest.requestContext.getSessionTokenEvaluationResults(); - gatewayStatistics.regionToHealthStatusesForPartitionKeyRange = rxDocumentServiceRequest.requestContext.getRegionToHealthStatusesForPartitionKeyRange(); + gatewayStatistics.locationToLocationSpecificHealthContext = rxDocumentServiceRequest.requestContext.getLocationToLocationSpecificHealthContext(); } } gatewayStatistics.statusCode = storeResponseDiagnostics.getStatusCode(); @@ -668,7 +669,7 @@ public static class StoreResponseStatistics { private Set sessionTokenEvaluationResults; @JsonSerialize - private Utils.ValueHolder> regionToHealthStatusesForPartitionKeyRange; + private Utils.ValueHolder> locationToLocationSpecificHealthContext; public String getExcludedRegions() { return this.excludedRegions; } @@ -700,6 +701,10 @@ public Set getSessionTokenEvaluationResults() { return sessionTokenEvaluationResults; } + public Utils.ValueHolder> getLocationToLocationSpecificHealthContext() { + return locationToLocationSpecificHealthContext; + } + @JsonIgnore public Duration getDuration() { if (requestStartTimeUTC == null || @@ -858,7 +863,7 @@ public static class GatewayStatistics { private String faultInjectionRuleId; private List faultInjectionEvaluationResults; private Set sessionTokenEvaluationResults; - private Utils.ValueHolder> regionToHealthStatusesForPartitionKeyRange; + private Utils.ValueHolder> locationToLocationSpecificHealthContext; public String getSessionToken() { return sessionToken; @@ -916,8 +921,8 @@ public Set getSessionTokenEvaluationResults() { return sessionTokenEvaluationResults; } - public Map getRegionToHealthStatusesForPartitionKeyRange() { - return regionToHealthStatusesForPartitionKeyRange.v; + public Utils.ValueHolder> getLocationToLocationSpecificHealthContext() { + return locationToLocationSpecificHealthContext; } public static class GatewayStatisticsSerializer extends StdSerializer { @@ -953,7 +958,7 @@ public void serialize(GatewayStatistics gatewayStatistics, } this.writeNonEmptyStringSetField(jsonGenerator, "sessionTokenEvaluationResults", gatewayStatistics.getSessionTokenEvaluationResults()); - this.writeNonNullObjectField(jsonGenerator, "regionHealthStatusesForPkRange", gatewayStatistics.getRegionToHealthStatusesForPartitionKeyRange()); + this.writeNonNullObjectField(jsonGenerator, "locationToLocationSpecificHealthContext", gatewayStatistics.getLocationToLocationSpecificHealthContext()); jsonGenerator.writeEndObject(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index ed8e7812b04e..4175bd26a90c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -7,6 +7,7 @@ import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.circuitBreaker.LocationSpecificHealthContext; import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.directconnectivity.StoreResult; import com.azure.cosmos.implementation.directconnectivity.TimeoutHelper; @@ -61,7 +62,8 @@ public class DocumentServiceRequestContext implements Cloneable { private FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker; private volatile Supplier clientRetryPolicySupplier; - private volatile Utils.ValueHolder> regionToHealthStatusesForPartitionKeyRange = new Utils.ValueHolder<>(); + private volatile Utils.ValueHolder> regionToLocationSpecificHealthContext + = new Utils.ValueHolder<>(); public DocumentServiceRequestContext() {} @@ -217,12 +219,12 @@ public void setClientRetryPolicySupplier(Supplier cli this.clientRetryPolicySupplier = clientRetryPolicySupplier; } - public Utils.ValueHolder> getRegionToHealthStatusesForPartitionKeyRange() { - return regionToHealthStatusesForPartitionKeyRange; + public Utils.ValueHolder> getLocationToLocationSpecificHealthContext() { + return regionToLocationSpecificHealthContext; } - public void setRegionToHealthStatusesForPartitionKeyRange(Map regionToHealthStatusesForPartitionKeyRange) { - this.regionToHealthStatusesForPartitionKeyRange.v = regionToHealthStatusesForPartitionKeyRange; + public void setLocationToLocationSpecificHealthContext(Map regionToLocationSpecificHealthContext) { + this.regionToLocationSpecificHealthContext.v = regionToLocationSpecificHealthContext; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 864bb795f72f..0bbf393267f7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -89,7 +89,7 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest partitionLevelLocationUnavailabilityInfoAsVal.areLocationsAvailableForPartitionKeyRange(applicableEndpoints)); } - request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionLevelLocationUnavailabilityInfoAsVal.getRegionToHealthStatus()); + request.requestContext.setLocationToLocationSpecificHealthContext(partitionLevelLocationUnavailabilityInfoAsVal.regionToLocationSpecificHealthContext); return partitionLevelLocationUnavailabilityInfoAsVal; }); @@ -139,7 +139,7 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r succeededLocation, request.isReadOnlyRequest()); - request.requestContext.setRegionToHealthStatusesForPartitionKeyRange(partitionKeyRangeToFailoverInfoAsVal.getRegionToHealthStatus()); + request.requestContext.setLocationToLocationSpecificHealthContext(partitionKeyRangeToFailoverInfoAsVal.regionToLocationSpecificHealthContext); return partitionKeyRangeToFailoverInfoAsVal; }); } @@ -243,12 +243,12 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques private class PartitionLevelLocationUnavailabilityInfo { private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; - private final ConcurrentHashMap regionToHealthStatus; + private final ConcurrentHashMap regionToLocationSpecificHealthContext; private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; private PartitionLevelLocationUnavailabilityInfo() { this.locationEndpointToLocationSpecificContextForPartition = new ConcurrentHashMap<>(); - this.regionToHealthStatus = new ConcurrentHashMap<>(); + this.regionToLocationSpecificHealthContext = new ConcurrentHashMap<>(); this.locationSpecificHealthContextTransitionHandler = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationSpecificHealthContextTransitionHandler; } @@ -278,11 +278,11 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe locationWithException, isReadOnlyRequest); - this.regionToHealthStatus.put( + this.regionToLocationSpecificHealthContext.put( GlobalPartitionEndpointManagerForCircuitBreaker .this.globalEndpointManager .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), - locationSpecificHealthContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); + locationSpecificHealthContextAfterTransition); isExceptionThresholdBreached.set(locationSpecificHealthContextAfterTransition.isExceptionThresholdBreached()); return locationSpecificHealthContextAfterTransition; @@ -316,11 +316,11 @@ private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, UR false, isReadOnlyRequest); - this.regionToHealthStatus.put( + this.regionToLocationSpecificHealthContext.put( GlobalPartitionEndpointManagerForCircuitBreaker .this.globalEndpointManager .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), - locationSpecificHealthContextAfterTransition.getLocationHealthStatus().getStringifiedLocationHealthStatus()); + locationSpecificHealthContextAfterTransition); return locationSpecificHealthContextAfterTransition; }); @@ -342,10 +342,6 @@ public boolean areLocationsAvailableForPartitionKeyRange(List availableLoca return false; } - - public ConcurrentHashMap getRegionToHealthStatus() { - return regionToHealthStatus; - } } public ConsecutiveExceptionBasedCircuitBreaker getConsecutiveExceptionBasedCircuitBreaker() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java index f2bdea913f01..e36a7ba879d7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java @@ -3,9 +3,20 @@ package com.azure.cosmos.implementation.circuitBreaker; +import com.azure.cosmos.implementation.DiagnosticsInstantSerializer; +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.annotation.JsonSerialize; + +import java.io.IOException; +import java.io.Serializable; import java.time.Instant; -public class LocationSpecificHealthContext { +@JsonSerialize(using = LocationSpecificHealthContext.LocationSpecificHealthContextSerializer.class) +public class LocationSpecificHealthContext implements Serializable { + + private static final long serialVersionUID = 1L; + private final int exceptionCountForWriteForCircuitBreaking; private final int successCountForWriteForRecovery; private final int exceptionCountForReadForCircuitBreaking; @@ -127,4 +138,25 @@ public LocationSpecificHealthContext build() { return locationSpecificHealthContext; } } + + static class LocationSpecificHealthContextSerializer extends com.fasterxml.jackson.databind.JsonSerializer { + + @Override + public void serialize(LocationSpecificHealthContext value, JsonGenerator gen, SerializerProvider provider) throws IOException { + gen.writeStartObject(); + + gen.writeNumberField("exceptionCountForWriteForCircuitBreaking", value.exceptionCountForWriteForCircuitBreaking); + gen.writeNumberField("exceptionCountForReadForCircuitBreaking", value.exceptionCountForReadForCircuitBreaking); + gen.writeNumberField("successCountForWriteForRecovery", value.successCountForWriteForRecovery); + gen.writeNumberField("successCountForReadForRecovery", value.successCountForReadForRecovery); + gen.writePOJOField("locationHealthStatus", value.locationHealthStatus); + gen.writeStringField("unavailableSince", toInstantString(value.unavailableSince)); + + gen.writeEndObject(); + } + + private String toInstantString(Instant instant) { + return DiagnosticsInstantSerializer.fromInstant(instant); + } + } } From 12f7c1fca5ba6f05f597f3cfcae674eda836d6ed Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 2 Jul 2024 10:51:10 -0400 Subject: [PATCH 103/140] Refactoring. --- .../implementation/ClientRetryPolicy.java | 24 ++++++---- ...nsecutiveExceptionBasedCircuitBreaker.java | 46 +++++++++---------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 39f228ad893c..d7f9d2708bdd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -168,12 +168,15 @@ public Mono shouldRetry(Exception e) { if (clientException != null && Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.REQUEST_TIMEOUT) && Exceptions.isSubStatusCode(clientException, HttpConstants.SubStatusCodes.TRANSIT_TIMEOUT)) { - logger.info( - "Request timeout - IsReadRequest {}, IsWebExceptionRetriable {}, NonIdempotentWriteRetriesEnabled {}", - this.isReadRequest, - false, - this.request.getNonIdempotentWriteRetriesEnabled(), - e); + + if (logger.isDebugEnabled()) { + logger.debug( + "Request timeout - IsReadRequest {}, IsWebExceptionRetriable {}, NonIdempotentWriteRetriesEnabled {}", + this.isReadRequest, + false, + this.request.getNonIdempotentWriteRetriesEnabled(), + e); + } return this.shouldRetryOnRequestTimeout( this.isReadRequest, @@ -181,11 +184,12 @@ public Mono shouldRetry(Exception e) { ); } - if (clientException != null && - Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.INTERNAL_SERVER_ERROR) && - Exceptions.isSubStatusCode(clientException, HttpConstants.SubStatusCodes.UNKNOWN)) { + if (clientException != null && Exceptions.isStatusCode(clientException, HttpConstants.StatusCodes.INTERNAL_SERVER_ERROR)) { + + if (logger.isDebugEnabled()) { + logger.info("Internal server error - IsReadRequest {}", this.isReadRequest, e); + } - logger.info("Internal server error - IsReadRequest {}", this.isReadRequest, e); return this.shouldRetryOnInternalServerError(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 123676862089..102d6d6d6a8e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -31,27 +31,27 @@ public LocationSpecificHealthContext handleException(LocationSpecificHealthConte exceptionCountAfterHandling++; int successCountAfterHandling = 0; + LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); + if (isReadOnlyRequest) { - return new LocationSpecificHealthContext.Builder() + return builder .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) .withSuccessCountForReadForRecovery(successCountAfterHandling) .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) - .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) - .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) .build(); + } else { - return new LocationSpecificHealthContext.Builder() + return builder .withSuccessCountForWriteForRecovery(successCountAfterHandling) .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) - .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) - .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) .build(); } case Unavailable: @@ -77,56 +77,54 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext exceptionCountAfterHandling = 0; + LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); + if (isReadOnlyRequest) { - return new LocationSpecificHealthContext.Builder() + return builder .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) - .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) - .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) .build(); } else { - return new LocationSpecificHealthContext.Builder() + return builder .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) - .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) - .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) .build(); } case HealthyTentative: successCountAfterHandling++; + builder = new LocationSpecificHealthContext.Builder() + .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) + .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); + if (isReadOnlyRequest) { - return new LocationSpecificHealthContext.Builder() + return builder .withSuccessCountForWriteForRecovery(locationSpecificHealthContext.getSuccessCountForWriteForRecovery()) .withExceptionCountForWriteForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking()) .withSuccessCountForReadForRecovery(successCountAfterHandling) .withExceptionCountForReadForCircuitBreaking(exceptionCountAfterHandling) - .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) - .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) .build(); } else { - return new LocationSpecificHealthContext.Builder() + return builder .withSuccessCountForWriteForRecovery(successCountAfterHandling) .withExceptionCountForWriteForCircuitBreaking(exceptionCountAfterHandling) .withSuccessCountForReadForRecovery(locationSpecificHealthContext.getSuccessCountForReadForRecovery()) .withExceptionCountForReadForCircuitBreaking(locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking()) - .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) - .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) .build(); } From 618bb711ac8cb783a8f832ad228c74201c2e8bae Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 2 Jul 2024 12:22:31 -0400 Subject: [PATCH 104/140] Reacting to review comments. --- .../azure/cosmos/implementation/ClientRetryPolicy.java | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index d7f9d2708bdd..329c9f1a8f05 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -300,10 +300,15 @@ private Mono shouldRetryOnEndpointFailureAsync(boolean isRead } private Mono shouldRetryOnGatewayTimeout() { - boolean canFailoverOnTimeout = canGatewayRequestFailoverOnTimeout(request); + + boolean canFailoverOnTimeout = canGatewayRequestFailoverOnTimeout(this.request); + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(this.request)) { + this.globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(this.request, this.request.requestContext.locationEndpointToRoute); + } //if operation is data plane read, metadata read, or query plan it can be retried on a different endpoint. - if(canFailoverOnTimeout) { + if (canFailoverOnTimeout) { if (!this.enableEndpointDiscovery || this.failoverRetryCount > MaxRetryCount) { logger.warn("shouldRetryOnHttpTimeout() Not retrying. Retry count = {}", this.failoverRetryCount); return Mono.just(ShouldRetryResult.noRetry()); From e36ecbd55431ee8b74713bbee132c2cdb3220bac Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 2 Jul 2024 15:54:48 -0400 Subject: [PATCH 105/140] Reacting to review comments. --- .../PartitionLevelCircuitBreakerTests.java | 736 +++++++++--------- 1 file changed, 368 insertions(+), 368 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 9837f1f874de..6a767610e159 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -684,8 +684,8 @@ public Object[][] miscellaneousOpTestConfigs() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)) - .withResponseDelay(Duration.ofSeconds(6)), + .withFaultInjectionDuration(Duration.ofSeconds(80)) + .withResponseDelay(Duration.ofSeconds(10)), this.buildTransitTimeoutFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -708,8 +708,8 @@ public Object[][] miscellaneousOpTestConfigs() { new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)) - .withResponseDelay(Duration.ofSeconds(6)), + .withFaultInjectionDuration(Duration.ofSeconds(80)) + .withResponseDelay(Duration.ofSeconds(10)), this.buildTransitTimeoutFaultInjectionRules, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, @@ -725,368 +725,368 @@ public Object[][] miscellaneousOpTestConfigs() { // injected into all replicas of the faulty EPK range. // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation // should see a success from the second preferred region. - { - String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), - this.buildTransitTimeoutFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasInternalServerError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation - // should see a success from the second preferred region. - { - String.format("Test with faulty %s with internal service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(6), - this.buildTransitTimeoutFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasInternalServerError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation - // should see a success from the second preferred region. - { - String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), - this.buildInternalServerErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasInternalServerError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation - // should see a success from the second preferred region. Although, after short-circuiting, a query operation - // will see request for QueryPlan from the short-circuited region. - { - String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withHitLimit(11), - this.buildInternalServerErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasInternalServerError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 429 injected into first preferred region for READ_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed when - // moved over to the second preferred region when the first preferred region has been short-circuited. - { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 429 injected into first preferred region for CREATE_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed when - // moved over to the second preferred region when the first preferred region has been short-circuited. - { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 429 injected into first preferred region for QUERY_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed when - // moved over to the second preferred region when the first preferred region has been short-circuited. - // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. - { - String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 404/1002 injected into first preferred region for READ_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit OperationCancelledException and only to succeed when - // moved over to the second preferred region when the first preferred region has been short-circuited. - { - String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildReadWriteSessionNotAvailableFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - }, - // 404/1002 injected into first preferred region for CREATE_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed when - // moved over to the second preferred region when the first preferred region has been short-circuited. - { - String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildReadWriteSessionNotAvailableFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - }, - // 449 injected into first preferred region for CREATE_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed when - // moved over to the second preferred region when the first preferred region has been short-circuited. - { - String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildRetryWithFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 449 injected into first preferred region for REPLACE_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException and only to succeed when - // moved over to the second preferred region when the first preferred region has been short-circuited. - { - String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildRetryWithFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasOperationCancelledException, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 503 injected into all regions for READ_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[]{ - String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 503 injected into all regions for UPSERT_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[]{ - String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(6), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 503 injected into all regions for QUERY_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[] { - String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED - }, - // 429 injected into first preferred region for READ_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) - // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. - new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasSuccess, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - }, - // 429 injected into first preferred region for CREATE_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) - // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. - new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.CREATE_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasSuccess, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasSecondPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - }, - // 429 injected into first preferred region for QUERY_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) - // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). - new Object[]{ - String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) - .withFaultInjectionDuration(Duration.ofSeconds(60)), - this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasSuccess, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - } +// { +// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withHitLimit(11), +// this.buildInternalServerErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasInternalServerError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation +// // should see a success from the second preferred region. +// { +// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withHitLimit(6), +// this.buildInternalServerErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasInternalServerError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation +// // should see a success from the second preferred region. +// { +// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withHitLimit(11), +// this.buildInternalServerErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasInternalServerError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation +// // should see a success from the second preferred region. Although, after short-circuiting, a query operation +// // will see request for QueryPlan from the short-circuited region. +// { +// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withHitLimit(11), +// this.buildInternalServerErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasInternalServerError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 429 injected into first preferred region for READ_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to hit OperationCancelledException and only to succeed when +// // moved over to the second preferred region when the first preferred region has been short-circuited. +// { +// String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildTooManyRequestsErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 429 injected into first preferred region for CREATE_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to hit OperationCancelledException and only to succeed when +// // moved over to the second preferred region when the first preferred region has been short-circuited. +// { +// String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildTooManyRequestsErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 429 injected into first preferred region for QUERY_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to hit OperationCancelledException and only to succeed when +// // moved over to the second preferred region when the first preferred region has been short-circuited. +// // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. +// { +// String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildTooManyRequestsErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 404/1002 injected into first preferred region for READ_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to hit OperationCancelledException and only to succeed when +// // moved over to the second preferred region when the first preferred region has been short-circuited. +// { +// String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildReadWriteSessionNotAvailableFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ONLY_DIRECT_MODE +// }, +// // 404/1002 injected into first preferred region for CREATE_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to hit OperationCancelledException and only to succeed when +// // moved over to the second preferred region when the first preferred region has been short-circuited. +// { +// String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildReadWriteSessionNotAvailableFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ONLY_DIRECT_MODE +// }, +// // 449 injected into first preferred region for CREATE_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to hit OperationCancelledException and only to succeed when +// // moved over to the second preferred region when the first preferred region has been short-circuited. +// { +// String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildRetryWithFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ONLY_DIRECT_MODE +// }, +// // 449 injected into first preferred region for REPLACE_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to hit OperationCancelledException and only to succeed when +// // moved over to the second preferred region when the first preferred region has been short-circuited. +// { +// String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildRetryWithFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasOperationCancelledException, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ONLY_DIRECT_MODE +// }, +// // 503 injected into all regions for READ_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to hit 503 until fault injection has it its injection limits. +// // After that, the operation should see a success from the first preferred region. +// new Object[]{ +// String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions) +// .withHitLimit(11), +// this.buildServiceUnavailableFaultInjectionRules, +// NO_END_TO_END_TIMEOUT, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasServiceUnavailableError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasAllRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 503 injected into all regions for UPSERT_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to hit 503 until fault injection has it its injection limits. +// // After that, the operation should see a success from the first preferred region. +// new Object[]{ +// String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions) +// .withHitLimit(6), +// this.buildServiceUnavailableFaultInjectionRules, +// NO_END_TO_END_TIMEOUT, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasServiceUnavailableError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasAllRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 503 injected into all regions for QUERY_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to hit 503 until fault injection has it its injection limits. +// // After that, the operation should see a success from the first preferred region. +// new Object[] { +// String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions) +// .withHitLimit(11), +// this.buildServiceUnavailableFaultInjectionRules, +// NO_END_TO_END_TIMEOUT, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasServiceUnavailableError, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// this.validateDiagnosticsContextHasAllRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ALL_CONNECTION_MODES_INCLUDED +// }, +// // 429 injected into first preferred region for READ_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) +// // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. +// new Object[]{ +// String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildTooManyRequestsErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasSuccess, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasAllRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ONLY_DIRECT_MODE +// }, +// // 429 injected into first preferred region for CREATE_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) +// // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. +// new Object[]{ +// String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.CREATE_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildTooManyRequestsErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasSuccess, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, +// this.validateDiagnosticsContextHasAllRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ONLY_DIRECT_MODE +// }, +// // 429 injected into first preferred region for QUERY_ITEM operation +// // injected into all replicas of the faulty EPK range. +// // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) +// // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). +// new Object[]{ +// String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), +// new FaultInjectionRuleParamsWrapper() +// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) +// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) +// .withFaultInjectionDuration(Duration.ofSeconds(60)), +// this.buildTooManyRequestsErrorFaultInjectionRules, +// TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, +// NO_REGION_SWITCH_HINT, +// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, +// this.validateResponseHasSuccess, +// this.validateResponseHasSuccess, +// this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, +// this.validateDiagnosticsContextHasAllRegions, +// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, +// ONLY_DIRECT_MODE +// } }; } @@ -1971,8 +1971,8 @@ private void execute( } } - logger.info("Sleep for 70 seconds to allow Unavailable partitions to be HealthyTentative"); - Thread.sleep(70_000); + logger.info("Sleep for 90 seconds to allow Unavailable partitions to be HealthyTentative"); + Thread.sleep(90_000); for (int i = operationIterationCountInFailureFlow + 1; i <= operationIterationCountInFailureFlow + operationIterationCountInRecoveryFlow; i++) { From acdcc508be55d7dda885789056ef6450802b23f7 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 4 Jul 2024 21:01:01 -0400 Subject: [PATCH 106/140] Reacting to review comments. --- ...EndpointManagerForCircuitBreakerTests.java | 112 ++- .../PartitionLevelCircuitBreakerTests.java | 736 +++++++++--------- .../query/DocumentProducerTest.java | 8 +- .../query/ReadManySplitTest.java | 3 +- .../com/azure/cosmos/CosmosAsyncClient.java | 4 +- .../com/azure/cosmos/CosmosClientBuilder.java | 5 + .../implementation/AsyncDocumentClient.java | 3 + .../implementation/ChangeFeedQueryImpl.java | 13 +- .../azure/cosmos/implementation/Configs.java | 19 + .../CosmosQueryRequestOptionsImpl.java | 11 + .../DocumentServiceRequestContext.java | 9 + ...FeedOperationContextForCircuitBreaker.java | 12 +- .../ImplementationBridgeHelpers.java | 4 + ...ointOperationContextForCircuitBreaker.java | 18 +- .../implementation/RxDocumentClientImpl.java | 124 ++- ...nsecutiveExceptionBasedCircuitBreaker.java | 5 +- ...itionEndpointManagerForCircuitBreaker.java | 192 ++++- .../LocationSpecificHealthContext.java | 38 +- ...pecificHealthContextTransitionHandler.java | 48 +- .../DefaultDocumentQueryExecutionContext.java | 3 +- .../query/DocumentProducer.java | 3 +- .../query/IDocumentQueryClient.java | 4 +- ...gOrderByDocumentQueryExecutionContext.java | 6 +- .../OrderByDocumentQueryExecutionContext.java | 6 +- ...ParallelDocumentQueryExecutionContext.java | 5 +- ...llelDocumentQueryExecutionContextBase.java | 9 +- .../query/QueryPlanRetriever.java | 5 +- .../models/CosmosQueryRequestOptions.java | 20 +- 28 files changed, 920 insertions(+), 505 deletions(-) rename sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/{implementation => }/GlobalPartitionEndpointManagerForCircuitBreakerTests.java (91%) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java similarity index 91% rename from sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java rename to sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index fc235134f540..35abc9bae76c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -1,8 +1,15 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -package com.azure.cosmos.implementation; - +package com.azure.cosmos; + +import com.azure.cosmos.implementation.ConnectionPolicy; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.OperationType; +import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.ResourceType; +import com.azure.cosmos.implementation.RxDocumentClientImpl; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; @@ -16,6 +23,7 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import reactor.core.publisher.Flux; import java.lang.reflect.Field; import java.net.URI; @@ -162,7 +170,7 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( @@ -229,7 +237,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( @@ -291,6 +299,16 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); + CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); + ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); + + globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); + + setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); + setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); + setUpMockInvocations(connectionPolicyMock); + globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -303,7 +321,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( @@ -376,6 +394,16 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); + CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); + ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); + + globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); + + setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); + setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); + setUpMockInvocations(connectionPolicyMock); + globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -388,7 +416,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( @@ -440,7 +468,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve assertThat(locationSpecificHealthContext.isExceptionThresholdBreached()).isTrue(); try { - Thread.sleep(65_000); + Thread.sleep(90_000); } catch (Exception ex) { throw new RuntimeException(ex); } @@ -468,6 +496,16 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); + CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); + ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); + + globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); + + setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); + setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); + setUpMockInvocations(connectionPolicyMock); + globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -480,7 +518,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( @@ -559,6 +597,16 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); + RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); + CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); + ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); + + globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); + + setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); + setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); + setUpMockInvocations(connectionPolicyMock); + globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -571,7 +619,7 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); RxDocumentServiceRequest request = constructRxDocumentServiceRequestInstance( @@ -642,7 +690,7 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); RxDocumentServiceRequest request1 = constructRxDocumentServiceRequestInstance( @@ -753,7 +801,7 @@ public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLeve LocationEastUsEndpointToLocationPair, LocationCentralUsEndpointToLocationPair) .stream() - .map(uriToLocationMappings -> uriToLocationMappings.getLeft()) + .map(Pair::getLeft) .collect(Collectors.toList()); Mockito.when(this.globalEndpointManagerMock.getApplicableWriteEndpoints(Mockito.anyList())).thenReturn((UnmodifiableList) UnmodifiableList.unmodifiableList(applicableReadWriteEndpoints)); @@ -915,4 +963,46 @@ private static Class getClassBySimpleName(Class[] classes, String classSim logger.warn("Class with simple name {} does not exist!", classSimpleName); return null; } + + private static void setUpMockInvocations(RxDocumentClientImpl rxDocumentClientMock, CosmosAsyncClient cosmosAsyncClientMock) { + Mockito.when( + rxDocumentClientMock.queryDocuments(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())) + .thenReturn(Flux.empty()); + + Mockito.when( + rxDocumentClientMock.getCachedCosmosAsyncClientSnapshot()) + .thenReturn(cosmosAsyncClientMock); + } + + private static void setUpMockInvocations(CosmosAsyncClient cosmosAsyncClientMock, ConnectionPolicy connectionPolicyMock) { + Mockito.when( + cosmosAsyncClientMock.getEffectiveDiagnosticsThresholds(Mockito.any()) + ).thenReturn(new CosmosDiagnosticsThresholds()); + + Mockito.when( + cosmosAsyncClientMock.getEffectiveConsistencyLevel(Mockito.any(), Mockito.any()) + ).thenReturn(ConsistencyLevel.EVENTUAL); + + Mockito.when( + cosmosAsyncClientMock.getConnectionPolicy() + ).thenReturn(connectionPolicyMock); + + Mockito.when( + cosmosAsyncClientMock.getAccountTagValue() + ).thenReturn("contoso-cosmos-db"); + + Mockito.when( + cosmosAsyncClientMock.getServiceEndpoint() + ).thenReturn("https://contoso-cosmos-db.azure.documents.com"); + + Mockito.when( + cosmosAsyncClientMock.getUserAgent() + ).thenReturn("java-circuit-breaker-test"); + } + + private static void setUpMockInvocations(ConnectionPolicy connectionPolicyMock) { + Mockito.when( + connectionPolicyMock.getConnectionMode() + ).thenReturn(ConnectionMode.DIRECT); + } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index 6a767610e159..e9088f6e22c8 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -627,11 +627,11 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ONLY_DIRECT_MODE }, - // Response-delay injected into first preferred region for CREATE_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) - // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. +// // Response-delay injected into first preferred region for CREATE_ITEM operation +// // injected into all replicas of the faulty EPK range (although only the primary replica +// // is ever involved - effectively doesn't impact the assertions for this test). +// // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) +// // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -721,372 +721,372 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ONLY_DIRECT_MODE }, - // 500 (internal server error) injected into first preferred region for READ_ITEM operation +// 500 (internal server error) injected into first preferred region for READ_ITEM operation +// injected into all replicas of the faulty EPK range. +// Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation +// should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation // should see a success from the second preferred region. -// { -// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withHitLimit(11), -// this.buildInternalServerErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasInternalServerError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation -// // should see a success from the second preferred region. -// { -// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withHitLimit(6), -// this.buildInternalServerErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasInternalServerError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation -// // should see a success from the second preferred region. -// { -// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withHitLimit(11), -// this.buildInternalServerErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasInternalServerError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation -// // should see a success from the second preferred region. Although, after short-circuiting, a query operation -// // will see request for QueryPlan from the short-circuited region. -// { -// String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withHitLimit(11), -// this.buildInternalServerErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasInternalServerError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 429 injected into first preferred region for READ_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to hit OperationCancelledException and only to succeed when -// // moved over to the second preferred region when the first preferred region has been short-circuited. -// { -// String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildTooManyRequestsErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 429 injected into first preferred region for CREATE_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to hit OperationCancelledException and only to succeed when -// // moved over to the second preferred region when the first preferred region has been short-circuited. -// { -// String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildTooManyRequestsErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 429 injected into first preferred region for QUERY_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to hit OperationCancelledException and only to succeed when -// // moved over to the second preferred region when the first preferred region has been short-circuited. -// // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. -// { -// String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildTooManyRequestsErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 404/1002 injected into first preferred region for READ_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to hit OperationCancelledException and only to succeed when -// // moved over to the second preferred region when the first preferred region has been short-circuited. -// { -// String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildReadWriteSessionNotAvailableFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ONLY_DIRECT_MODE -// }, -// // 404/1002 injected into first preferred region for CREATE_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to hit OperationCancelledException and only to succeed when -// // moved over to the second preferred region when the first preferred region has been short-circuited. -// { -// String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildReadWriteSessionNotAvailableFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ONLY_DIRECT_MODE -// }, -// // 449 injected into first preferred region for CREATE_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to hit OperationCancelledException and only to succeed when -// // moved over to the second preferred region when the first preferred region has been short-circuited. -// { -// String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildRetryWithFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ONLY_DIRECT_MODE -// }, -// // 449 injected into first preferred region for REPLACE_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to hit OperationCancelledException and only to succeed when -// // moved over to the second preferred region when the first preferred region has been short-circuited. -// { -// String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildRetryWithFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasOperationCancelledException, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ONLY_DIRECT_MODE -// }, -// // 503 injected into all regions for READ_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to hit 503 until fault injection has it its injection limits. -// // After that, the operation should see a success from the first preferred region. -// new Object[]{ -// String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions) -// .withHitLimit(11), -// this.buildServiceUnavailableFaultInjectionRules, -// NO_END_TO_END_TIMEOUT, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasServiceUnavailableError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasAllRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 503 injected into all regions for UPSERT_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to hit 503 until fault injection has it its injection limits. -// // After that, the operation should see a success from the first preferred region. -// new Object[]{ -// String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions) -// .withHitLimit(6), -// this.buildServiceUnavailableFaultInjectionRules, -// NO_END_TO_END_TIMEOUT, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasServiceUnavailableError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasAllRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 503 injected into all regions for QUERY_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to hit 503 until fault injection has it its injection limits. -// // After that, the operation should see a success from the first preferred region. -// new Object[] { -// String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions) -// .withHitLimit(11), -// this.buildServiceUnavailableFaultInjectionRules, -// NO_END_TO_END_TIMEOUT, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasServiceUnavailableError, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// this.validateDiagnosticsContextHasAllRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ALL_CONNECTION_MODES_INCLUDED -// }, -// // 429 injected into first preferred region for READ_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) -// // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. -// new Object[]{ -// String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildTooManyRequestsErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasSuccess, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasAllRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ONLY_DIRECT_MODE -// }, -// // 429 injected into first preferred region for CREATE_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) -// // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. -// new Object[]{ -// String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.CREATE_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildTooManyRequestsErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasSuccess, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasSecondPreferredRegionOnly, -// this.validateDiagnosticsContextHasAllRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ONLY_DIRECT_MODE -// }, -// // 429 injected into first preferred region for QUERY_ITEM operation -// // injected into all replicas of the faulty EPK range. -// // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) -// // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). -// new Object[]{ -// String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), -// new FaultInjectionRuleParamsWrapper() -// .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) -// .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) -// .withFaultInjectionDuration(Duration.ofSeconds(60)), -// this.buildTooManyRequestsErrorFaultInjectionRules, -// TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, -// NO_REGION_SWITCH_HINT, -// !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, -// this.validateResponseHasSuccess, -// this.validateResponseHasSuccess, -// this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, -// this.validateDiagnosticsContextHasAllRegions, -// this.validateDiagnosticsContextHasFirstPreferredRegionOnly, -// ONLY_DIRECT_MODE -// } + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. Although, after short-circuiting, a query operation + // will see request for QueryPlan from the short-circuited region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 429 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 429 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 429 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 404/1002 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with read session not available in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE + }, + // 404/1002 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with write session not available error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildReadWriteSessionNotAvailableFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE + }, + // 449 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildRetryWithFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE + }, + // 449 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with retry with service error in the first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildRetryWithFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE + }, + // 503 injected into all regions for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + // 503 injected into all regions for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + //503 injected into all regions for QUERY_ITEM operation + //injected into all replicas of the faulty EPK range. + //Expectation is for the operation to hit 503 until fault injection has it its injection limits. + //After that, the operation should see a success from the first preferred region. + new Object[] { + String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ALL_CONNECTION_MODES_INCLUDED + }, + //429 injected into first preferred region for READ_ITEM operation + //injected into all replicas of the faulty EPK range. + //Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + //and only from the second preferred region when short-circuiting has kicked in for the first preferred region. + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE + }, + // 429 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE + }, + // 429 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) + // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). + new Object[]{ + String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE + } }; } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java index 03198543fddf..5613b3866d95 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/DocumentProducerTest.java @@ -557,7 +557,7 @@ public void simple() { invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); @@ -659,7 +659,7 @@ public void retries() { invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); @@ -765,7 +765,7 @@ public void retriesExhausted() { invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(queryClient).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); doAnswer(invocation -> { RxDocumentServiceRequest req = invocation.getArgument(0); @@ -902,7 +902,7 @@ private IDocumentQueryClient mockQueryClient(List replacement invocation.getArgument(4); return feedOperation.apply(retryPolicyFactory, req); - }).when(client).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any()); + }).when(client).executeFeedOperationWithAvailabilityStrategy(any(), any(), any(), any(), any(), any()); doReturn(cache).when(client).getPartitionKeyRangeCache(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java index 8fde4699ccf4..9bfbe3d77634 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/query/ReadManySplitTest.java @@ -121,7 +121,8 @@ protected DocumentProducer createDocumentProducer(String collectionRid, Function>> executeFunc, Supplier createRetryPolicyFunc, - FeedRangeEpkImpl feedRange) { + FeedRangeEpkImpl feedRange, + String collectionLink) { return new DocumentProducer( client, collectionRid, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java index 0197f50ecd9e..48f631b4af3c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java @@ -731,7 +731,7 @@ private Mono createDatabaseInternal(Database database, C requestOptions); } - private ConsistencyLevel getEffectiveConsistencyLevel( + ConsistencyLevel getEffectiveConsistencyLevel( OperationType operationType, ConsistencyLevel desiredConsistencyLevelOfOperation) { @@ -860,7 +860,7 @@ public boolean isEndpointDiscoveryEnabled(CosmosAsyncClient client) { @Override public String getConnectionMode(CosmosAsyncClient client) { - return client.connectionPolicy.getConnectionMode().toString(); + return client.getConnectionPolicy().getConnectionMode().toString(); } @Override diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 9a74fd197593..4edf1ca7f66b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1183,6 +1183,7 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { validateConfig(); buildConnectionPolicy(); CosmosAsyncClient cosmosAsyncClient = new CosmosAsyncClient(this); + cosmosAsyncClient.getDocClientWrapper().cacheEnclosingCosmosAsyncClient(cosmosAsyncClient); if (proactiveContainerInitConfig != null) { cosmosAsyncClient.recordOpenConnectionsAndInitCachesStarted(proactiveContainerInitConfig.getCosmosContainerIdentities()); @@ -1217,6 +1218,10 @@ public CosmosClient buildClient() { validateConfig(); buildConnectionPolicy(); CosmosClient cosmosClient = new CosmosClient(this); + + CosmosAsyncClient cosmosAsyncClient = cosmosClient.asyncClient(); + cosmosAsyncClient.getDocClientWrapper().cacheEnclosingCosmosAsyncClient(cosmosAsyncClient); + if (proactiveContainerInitConfig != null) { cosmosClient.recordOpenConnectionsAndInitCachesStarted(proactiveContainerInitConfig.getCosmosContainerIdentities()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index 56f158a40282..fde68cc437f2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -5,6 +5,7 @@ import com.azure.core.credential.AzureKeyCredential; import com.azure.core.credential.TokenCredential; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosContainerProactiveInitConfig; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; import com.azure.cosmos.CosmosItemSerializer; @@ -1647,4 +1648,6 @@ Flux> readAllDocuments( void recordOpenConnectionsAndInitCachesStarted(List cosmosContainerIdentities); public String getMasterKeyOrResourceToken(); + + void cacheEnclosingCosmosAsyncClient(CosmosAsyncClient cosmosAsyncClient); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java index f6276835306d..f830626f3d49 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ChangeFeedQueryImpl.java @@ -40,6 +40,7 @@ class ChangeFeedQueryImpl { private final DiagnosticsClientContext clientContext; private final Supplier createRequestFunc; private final String documentsLink; + private final String collectionLink; private final Function>> executeFunc; private final Class klass; private final CosmosChangeFeedRequestOptions options; @@ -77,6 +78,7 @@ public ChangeFeedQueryImpl( this.client = client; this.resourceType = resourceType; this.klass = klass; + this.collectionLink = collectionLink; this.documentsLink = Utils.joinPath(collectionLink, Paths.DOCUMENTS_PATH_SEGMENT); this.options = requestOptions; this.itemSerializer = client.getEffectiveItemSerializer(requestOptions.getCustomItemSerializer()); @@ -144,7 +146,8 @@ private RxDocumentServiceRequest createDocumentServiceRequest() { if (request.requestContext != null) { request.requestContext.setExcludeRegions(options.getExcludedRegions()); - request.requestContext.setFeedOperationContext(new FeedOperationContextForCircuitBreaker(new ConcurrentHashMap<>(), false)); + request.requestContext.setFeedOperationContext( + new FeedOperationContextForCircuitBreaker(new ConcurrentHashMap<>(), false, collectionLink)); } return request; @@ -204,14 +207,14 @@ private Mono handlePartitionLevelCircuitBreakingPrereq .flatMap(req -> client.getCollectionCache().resolveCollectionAsync(null, req) .flatMap(documentCollectionValueHolder -> { - checkNotNull(documentCollectionValueHolder, "documentCollectionValueHolder cannot be null!"); - checkNotNull(documentCollectionValueHolder.v, "documentCollectionValueHolder.v cannot be null!"); + checkNotNull(documentCollectionValueHolder, "Argument 'documentCollectionValueHolder' cannot be null!"); + checkNotNull(documentCollectionValueHolder.v, "Argument 'documentCollectionValueHolder.v' cannot be null!"); return client.getPartitionKeyRangeCache().tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { - checkNotNull(collectionRoutingMapValueHolder, "collectionRoutingMapValueHolder cannot be null!"); - checkNotNull(collectionRoutingMapValueHolder.v, "collectionRoutingMapValueHolder.v cannot be null!"); + checkNotNull(collectionRoutingMapValueHolder, "Argument 'collectionRoutingMapValueHolder' cannot be null!"); + checkNotNull(collectionRoutingMapValueHolder.v, "Argument 'collectionRoutingMapValueHolder.v' cannot be null!"); changeFeedRequestOptionsAccessor.setPartitionKeyDefinition(options, documentCollectionValueHolder.v.getPartitionKey()); changeFeedRequestOptionsAccessor.setCollectionRid(options, documentCollectionValueHolder.v.getResourceId()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index eb24d159e88d..4bd07c63d058 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -211,6 +211,8 @@ public class Configs { private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; private static final String STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = "COSMOS.STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS"; private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 60; + private static final String ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = "COSMOS.ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS"; + private static final int DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = 30; public Configs() { this.sslContext = sslContextInit(); @@ -671,4 +673,21 @@ public static int getStalePartitionUnavailabilityRefreshIntervalInSeconds() { return DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS; } + + public static int getAllowedPartitionUnavailabilityDurationInSeconds() { + + String valueFromSystemProperty = System.getProperty(ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Integer.parseInt(valueFromSystemProperty); + } + + String valueFromEnvVariable = System.getenv(ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Integer.parseInt(valueFromEnvVariable); + } + + return DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java index 11381cc3bb2d..29e1f9a0757c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java @@ -31,6 +31,7 @@ public final class CosmosQueryRequestOptionsImpl extends CosmosQueryRequestOptio private Integer maxItemCountForVectorSearch; private List cancelledRequestDiagnosticsTracker = new ArrayList<>(); private String collectionRid; + private boolean isPerPartitionCircuitBreakingDisabled = false; /** * Instantiates a new query request options. @@ -70,6 +71,7 @@ public CosmosQueryRequestOptionsImpl(CosmosQueryRequestOptionsImpl options) { this.cancelledRequestDiagnosticsTracker = options.cancelledRequestDiagnosticsTracker; this.maxItemCountForVectorSearch = options.maxItemCountForVectorSearch; this.collectionRid = options.collectionRid; + this.isPerPartitionCircuitBreakingDisabled = options.isPerPartitionCircuitBreakingDisabled; } /** @@ -397,4 +399,13 @@ public String getCollectionRid() { public void setCollectionRid(String collectionRid) { this.collectionRid = collectionRid; } + + public boolean isPerPartitionCircuitBreakingDisabled() { + return this.isPerPartitionCircuitBreakingDisabled; + } + + public CosmosQueryRequestOptionsImpl setPerPartitionCircuitBreakingDisabled(boolean isPartitionCircuitBreakingDisabled) { + this.isPerPartitionCircuitBreakingDisabled = isPartitionCircuitBreakingDisabled; + return this; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index 4175bd26a90c..3783f36cbaac 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -64,6 +64,7 @@ public class DocumentServiceRequestContext implements Cloneable { private volatile Supplier clientRetryPolicySupplier; private volatile Utils.ValueHolder> regionToLocationSpecificHealthContext = new Utils.ValueHolder<>(); + private volatile boolean isPerPartitionCircuitBreakerDisabledForRequest = false; public DocumentServiceRequestContext() {} @@ -226,5 +227,13 @@ public Utils.ValueHolder> getLocation public void setLocationToLocationSpecificHealthContext(Map regionToLocationSpecificHealthContext) { this.regionToLocationSpecificHealthContext.v = regionToLocationSpecificHealthContext; } + + public boolean isPerPartitionCircuitBreakerDisabledForRequest() { + return this.isPerPartitionCircuitBreakerDisabledForRequest; + } + + public void setPerPartitionCircuitBreakerDisabledForRequest(boolean perPartitionCircuitBreakerDisabledForRequest) { + this.isPerPartitionCircuitBreakerDisabledForRequest = perPartitionCircuitBreakerDisabledForRequest; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java index d6e5d329a935..b58f4ab6e89c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java @@ -12,11 +12,17 @@ public class FeedOperationContextForCircuitBreaker { private final Map partitionKeyRangesWithSuccess; private final boolean isThresholdBasedAvailabilityStrategyEnabled; + private final String collectionLink; private boolean isRequestHedged; - public FeedOperationContextForCircuitBreaker(Map partitionKeyRangesWithSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { + public FeedOperationContextForCircuitBreaker( + Map partitionKeyRangesWithSuccess, + boolean isThresholdBasedAvailabilityStrategyEnabled, + String collectionLink) { + this.partitionKeyRangesWithSuccess = partitionKeyRangesWithSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; + this.collectionLink = collectionLink; } public void setIsRequestHedged(boolean isRequestHedged) { @@ -40,4 +46,8 @@ public boolean hasPartitionKeyRangeSeenSuccess(PartitionKeyRange partitionKeyRan public boolean isThresholdBasedAvailabilityStrategyEnabled() { return isThresholdBasedAvailabilityStrategyEnabled; } + + public String getCollectionLink() { + return collectionLink; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 79ff82985b5a..f9723e2f35d5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -303,6 +303,10 @@ void setCancelledRequestDiagnosticsTracker( void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid); String getCollectionRid(CosmosQueryRequestOptions options); + + boolean isPerPartitionCircuitBreakerDisabled(CosmosQueryRequestOptions options); + + void disablePerPartitionCircuitBreaker(CosmosQueryRequestOptions options); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java index e2df15b1f9bd..465a27b016f1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java @@ -8,21 +8,25 @@ public class PointOperationContextForCircuitBreaker { private final AtomicBoolean hasOperationSeenSuccess; - private final boolean isThresholdBasedAvailabilityStrategyEnabled; - private boolean isRequestHedged; + private final String collectionLink; + + public PointOperationContextForCircuitBreaker( + AtomicBoolean hasOperationSeenSuccess, + boolean isThresholdBasedAvailabilityStrategyEnabled, + String collectionLink) { - public PointOperationContextForCircuitBreaker(AtomicBoolean hasOperationSeenSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled) { this.hasOperationSeenSuccess = hasOperationSeenSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; + this.collectionLink = collectionLink; } public void setIsRequestHedged(boolean isRequestHedged) { this.isRequestHedged = isRequestHedged; } - public boolean getIsRequestHedged() { + public boolean isRequestHedged() { return this.isRequestHedged; } @@ -35,6 +39,10 @@ public boolean getHasOperationSeenSuccess() { } public boolean isThresholdBasedAvailabilityStrategyEnabled() { - return isThresholdBasedAvailabilityStrategyEnabled; + return this.isThresholdBasedAvailabilityStrategyEnabled; + } + + public String getCollectionLink() { + return this.collectionLink; } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 5c278881635f..24c1c2762c35 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -9,6 +9,7 @@ import com.azure.cosmos.BridgeInternal; import com.azure.cosmos.ConnectionMode; import com.azure.cosmos.ConsistencyLevel; +import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosContainerProactiveInitConfig; import com.azure.cosmos.CosmosDiagnostics; import com.azure.cosmos.CosmosDiagnosticsContext; @@ -258,6 +259,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization private final boolean sessionCapturingDisabled; private final boolean isRegionScopedSessionCapturingEnabledOnClientOrSystemConfig; private List operationPolicies; + private AtomicReference cachedCosmosAsyncClientSnapshot; public RxDocumentClientImpl(URI serviceEndpoint, String masterKeyOrResourceToken, @@ -557,6 +559,7 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); this.globalPartitionEndpointManagerForCircuitBreaker.init(); + this.cachedCosmosAsyncClientSnapshot = new AtomicReference<>(); this.diagnosticsClientConfig.withPartitionLevelCircuitBreakerConfig(this.globalPartitionEndpointManagerForCircuitBreaker.getCircuitBreakerConfig()); @@ -686,6 +689,8 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.globalPartitionEndpointManagerForCircuitBreaker); this.globalEndpointManager.init(); + this.globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(this); + DatabaseAccount databaseAccountSnapshot = this.initializeGatewayConfigurationReader(); this.resetSessionContainerIfNeeded(databaseAccountSnapshot); @@ -1914,7 +1919,11 @@ private Mono getBatchDocumentRequest(DocumentClientRet request.requestContext.setExcludeRegions(options.getExcludedRegions()); } - request.requestContext.setPointOperationContext(new PointOperationContextForCircuitBreaker(new AtomicBoolean(false), false)); + request.requestContext.setPointOperationContext( + new PointOperationContextForCircuitBreaker( + new AtomicBoolean(false), + false, + documentCollectionLink)); return this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request) .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) @@ -2226,7 +2235,8 @@ public Mono> createDocument( collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink); + collectionLink, + StringUtils.EMPTY); } private Mono> createDocumentCore( @@ -2365,7 +2375,7 @@ private Mono handleCircuitBreakingFeedbackForPointOperation( if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); } } else { @@ -2389,7 +2399,7 @@ private Mono handleCircuitBreakingFeedbackForPointOperation( if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - if (!pointOperationContextForCircuitBreaker.getIsRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { this.handleLocationCancellationExceptionForPartitionKeyRange(potentiallyFailedRequest); } } @@ -2499,7 +2509,8 @@ public Mono> upsertDocument(String collectionLink, Ob collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink); + collectionLink, + StringUtils.EMPTY); } private Mono> upsertDocumentCore( @@ -2605,7 +2616,8 @@ public Mono> replaceDocument(String documentLink, Obj collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink); + collectionLink, + documentLink); } private Mono> replaceDocumentCore( @@ -2702,7 +2714,8 @@ public Mono> replaceDocument(Document document, Reque collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink); + collectionLink, + StringUtils.EMPTY); } private Mono> replaceDocumentCore( @@ -2897,7 +2910,8 @@ public Mono> patchDocument(String documentLink, collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink); + collectionLink, + documentLink); } private Mono> patchDocumentCore( @@ -3035,7 +3049,8 @@ public Mono> deleteDocument(String documentLink, Requ collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink); + collectionLink, + documentLink); } @Override @@ -3056,7 +3071,8 @@ public Mono> deleteDocument(String documentLink, Inte collectionRoutingMap), options, options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink); + collectionLink, + documentLink); } private Mono> deleteDocumentCore( @@ -3210,7 +3226,8 @@ private Mono> readDocument( options, false, innerDiagnosticsFactory, - collectionLink); + collectionLink, + documentLink); } private Mono> readDocumentCore( @@ -3844,15 +3861,16 @@ public Mono executeFeedOperationWithAvailabilityStrategy( OperationType operationType, Supplier retryPolicyFactory, RxDocumentServiceRequest req, - BiFunction, RxDocumentServiceRequest, Mono> feedOperation) { + BiFunction, RxDocumentServiceRequest, Mono> feedOperation, + String collectionLink) { return RxDocumentClientImpl.this.executeFeedOperationWithAvailabilityStrategy( resourceType, operationType, retryPolicyFactory, req, - feedOperation - ); + feedOperation, + collectionLink); } @Override @@ -3882,9 +3900,14 @@ public Mono populateFeedRangeHeader(RxDocumentServiceR @Override public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions) { + ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.CosmosQueryRequestOptionsAccessor queryRequestOptionsAccessor + = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor(); + + request.requestContext.setPerPartitionCircuitBreakerDisabledForRequest(queryRequestOptionsAccessor.isPerPartitionCircuitBreakerDisabled(queryRequestOptions)); + if (RxDocumentClientImpl.this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { - String collectionRid = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor().getCollectionRid(queryRequestOptions); + String collectionRid = queryRequestOptionsAccessor.getCollectionRid(queryRequestOptions); checkNotNull(collectionRid, "Argument 'collectionRid' cannot be null!"); @@ -5473,6 +5496,10 @@ public void setSession(ISessionContainer sessionContainer) { this.sessionContainer = sessionContainer; } + public CosmosAsyncClient getCachedCosmosAsyncClientSnapshot() { + return cachedCosmosAsyncClientSnapshot.get(); + } + @Override public RxClientCollectionCache getCollectionCache() { return this.collectionCache; @@ -5678,6 +5705,11 @@ public String getMasterKeyOrResourceToken() { return this.masterKeyOrResourceToken; } + @Override + public void cacheEnclosingCosmosAsyncClient(CosmosAsyncClient cosmosAsyncClient) { + this.cachedCosmosAsyncClientSnapshot.set(cosmosAsyncClient); + } + private static SqlQuerySpec createLogicalPartitionScanQuerySpec( PartitionKey partitionKey, List partitionKeySelectors) { @@ -5889,7 +5921,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat DocumentPointOperation callback, RequestOptions initialRequestOptions, boolean idempotentWriteRetriesEnabled, - String collectionLink) { + String collectionLink, + String documentLink) { return wrapPointOperationWithAvailabilityStrategy( resourceType, @@ -5898,7 +5931,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat initialRequestOptions, idempotentWriteRetriesEnabled, this, - collectionLink); + collectionLink, + documentLink); } private Mono> wrapPointOperationWithAvailabilityStrategy( @@ -5908,7 +5942,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat RequestOptions initialRequestOptions, boolean idempotentWriteRetriesEnabled, DiagnosticsClientContext innerDiagnosticsFactory, - String collectionLink) { + String collectionLink, + String documentLink) { return Mono.defer(() -> this.collectionCache.resolveByNameAsync(null, collectionLink, null) .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) @@ -5945,7 +5980,11 @@ private Mono> wrapPointOperationWithAvailabilityStrat if (orderedApplicableRegionsForSpeculation.size() < 2) { // There is at most one applicable region - no hedging possible - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker(isOperationSuccessful, false); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + false, + collectionLink); + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder); } @@ -5966,7 +6005,12 @@ private Mono> wrapPointOperationWithAvailabilityStrat // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker(isOperationSuccessful, true); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest + = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + true, + collectionLink); + pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); Mono initialMonoAcrossAllRegions = callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder) @@ -5996,7 +6040,12 @@ private Mono> wrapPointOperationWithAvailabilityStrat // Non-Transient errors are mapped to a value - this ensures the firstWithValue // operator below will complete the composite Mono for both successful values // and non-transient errors - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForHedgedRequest = new PointOperationContextForCircuitBreaker(isOperationSuccessful, true); + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForHedgedRequest + = new PointOperationContextForCircuitBreaker( + isOperationSuccessful, + true, + collectionLink); + pointOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); Mono regionalCrossRegionRetryMono = callback.apply(clonedOptions, endToEndPolicyConfig, diagnosticsFactory, pointOperationContextForCircuitBreakerForHedgedRequest, collectionRoutingMapValueHolder) @@ -6273,8 +6322,9 @@ private Mono executeFeedOperationWithAvailabilityStrategy( final OperationType operationType, final Supplier retryPolicyFactory, final RxDocumentServiceRequest req, - final BiFunction, RxDocumentServiceRequest, Mono> feedOperation - ) { + final BiFunction, RxDocumentServiceRequest, Mono> feedOperation, + final String collectionLink) { + checkNotNull(retryPolicyFactory, "Argument 'retryPolicyFactory' must not be null."); checkNotNull(req, "Argument 'req' must not be null."); assert(resourceType == ResourceType.Document); @@ -6295,7 +6345,12 @@ private Mono executeFeedOperationWithAvailabilityStrategy( if (orderedApplicableRegionsForSpeculation.size() < 2) { - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, false); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + false, + collectionLink); + feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow.setIsRequestHedged(false); req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForRequestOutsideOfAvailabilityStrategyFlow); @@ -6303,7 +6358,12 @@ private Mono executeFeedOperationWithAvailabilityStrategy( return feedOperation.apply(retryPolicyFactory, req); } - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + true, + collectionLink); + feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow.setIsRequestHedged(false); req.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForParentRequestInAvailabilityStrategyFlow); @@ -6321,7 +6381,12 @@ private Mono executeFeedOperationWithAvailabilityStrategy( // by the ClientRetryPolicy for the initial request - so, any outcome of the // initial Mono should be treated as non-transient error - even when // the error would otherwise be treated as transient - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForNonHedgedRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForNonHedgedRequest + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + true, + collectionLink); + feedOperationContextForCircuitBreakerForNonHedgedRequest.setIsRequestHedged(false); clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForNonHedgedRequest); @@ -6350,7 +6415,12 @@ private Mono executeFeedOperationWithAvailabilityStrategy( region) ); - FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForHedgedRequest = new FeedOperationContextForCircuitBreaker(partitionKeyRangesWithSuccess, true); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreakerForHedgedRequest + = new FeedOperationContextForCircuitBreaker( + partitionKeyRangesWithSuccess, + true, + collectionLink); + feedOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); clonedRequest.requestContext.setFeedOperationContext(feedOperationContextForCircuitBreakerForHedgedRequest); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 102d6d6d6a8e..ecb45f4d1ca4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -34,7 +34,8 @@ public LocationSpecificHealthContext handleException(LocationSpecificHealthConte LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) + .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()); if (isReadOnlyRequest) { @@ -80,6 +81,7 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()) .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); if (isReadOnlyRequest) { @@ -107,6 +109,7 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext builder = new LocationSpecificHealthContext.Builder() .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) + .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()) .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); if (isReadOnlyRequest) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 0bbf393267f7..3195e822af41 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -3,18 +3,35 @@ package com.azure.cosmos.implementation.circuitBreaker; +import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; +import com.azure.cosmos.CosmosException; import com.azure.cosmos.implementation.Configs; +import com.azure.cosmos.implementation.CosmosPagedFluxOptions; import com.azure.cosmos.implementation.CosmosSchedulers; +import com.azure.cosmos.implementation.Document; +import com.azure.cosmos.implementation.DocumentCollection; +import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.PathsHelper; +import com.azure.cosmos.implementation.PointOperationContextForCircuitBreaker; +import com.azure.cosmos.implementation.QueryFeedOperationState; import com.azure.cosmos.implementation.ResourceType; +import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; +import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; +import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; +import com.azure.cosmos.models.CosmosQueryRequestOptions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; +import reactor.core.publisher.SignalType; import java.net.URI; import java.time.Duration; @@ -25,6 +42,8 @@ import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Collectors; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -32,11 +51,14 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private static final Logger logger = LoggerFactory.getLogger(GlobalPartitionEndpointManagerForCircuitBreaker.class); + private static final ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.CosmosQueryRequestOptionsAccessor queryRequestOptionsAccessor + = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor(); private final GlobalEndpointManager globalEndpointManager; private final ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo; private final ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions; private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; + private final AtomicReference rxDocClientSnapshot; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); @@ -45,7 +67,9 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig = Configs.getPartitionLevelCircuitBreakerConfig(); this.consecutiveExceptionBasedCircuitBreaker = new ConsecutiveExceptionBasedCircuitBreaker(partitionLevelCircuitBreakerConfig); - this.locationSpecificHealthContextTransitionHandler = new LocationSpecificHealthContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); + this.locationSpecificHealthContextTransitionHandler + = new LocationSpecificHealthContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); + this.rxDocClientSnapshot = new AtomicReference<>(); } public void init() { @@ -71,13 +95,19 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest AtomicBoolean isFailoverPossible = new AtomicBoolean(true); AtomicBoolean isFailureThresholdBreached = new AtomicBoolean(false); + String collectionLink = getCollectionLink(request); + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionLevelLocationUnavailabilityInfoAsVal) -> { if (partitionLevelLocationUnavailabilityInfoAsVal == null) { partitionLevelLocationUnavailabilityInfoAsVal = new PartitionLevelLocationUnavailabilityInfo(); } - isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoAsVal.handleException(partitionKeyRangeWrapperAsKey, failedLocation, request.isReadOnlyRequest())); + isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoAsVal.handleException( + partitionKeyRangeWrapperAsKey, + collectionLink, + failedLocation, + request.isReadOnlyRequest())); if (isFailureThresholdBreached.get()) { @@ -128,6 +158,8 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r PartitionKeyRangeWrapper partitionKeyRangeWrapper = new PartitionKeyRangeWrapper(partitionKeyRange, resourceId); URI succeededLocation = request.requestContext.locationEndpointToRoute; + String collectionLink = getCollectionLink(request); + this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.compute(partitionKeyRangeWrapper, (partitionKeyRangeWrapperAsKey, partitionKeyRangeToFailoverInfoAsVal) -> { if (partitionKeyRangeToFailoverInfoAsVal == null) { @@ -136,6 +168,7 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r partitionKeyRangeToFailoverInfoAsVal.handleSuccess( partitionKeyRangeWrapper, + collectionLink, succeededLocation, request.isReadOnlyRequest()); @@ -173,7 +206,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String coll return UnmodifiableList.unmodifiableList(unavailableLocations); } - private Flux updateStaleLocationInfo() { + private Flux updateStaleLocationInfo() { return Mono.just(1) .delayElement(Duration.ofSeconds(Configs.getStalePartitionUnavailabilityRefreshIntervalInSeconds())) .repeat() @@ -188,30 +221,103 @@ private Flux updateStaleLocationInfo() { PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); if (partitionLevelLocationUnavailabilityInfo != null) { - for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.entrySet()) { - URI locationWithStaleUnavailabilityInfo = locationToLocationLevelMetrics.getKey(); + List>> locationToLocationSpecificHealthContextList = new ArrayList<>(); - partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + for (Map.Entry locationToLocationLevelMetrics : partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.entrySet()) { - if (locationSpecificContextAsVal != null) { - locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationSpecificHealthContextTransitionHandler.handleSuccess( - locationSpecificContextAsVal, - partitionKeyRangeWrapper, - locationWithStaleUnavailabilityInfoAsKey, - false, - true); - } + URI locationWithStaleUnavailabilityInfo = locationToLocationLevelMetrics.getKey(); + LocationSpecificHealthContext locationSpecificHealthContext = locationToLocationLevelMetrics.getValue(); - return locationSpecificContextAsVal; - }); + if (!locationSpecificHealthContext.isRegionAvailableToProcessRequests()) { + locationToLocationSpecificHealthContextList.add(Pair.of(partitionKeyRangeWrapper, Pair.of(locationWithStaleUnavailabilityInfo, locationSpecificHealthContext))); + } } + + return Flux.fromIterable(locationToLocationSpecificHealthContextList); } else { this.partitionKeyRangesWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); + return Mono.empty(); + } + }) + .flatMap(locationToLocationSpecificHealthContextPair -> { + + PartitionKeyRangeWrapper partitionKeyRangeWrapper = locationToLocationSpecificHealthContextPair.getLeft(); + URI locationWithStaleUnavailabilityInfo = locationToLocationSpecificHealthContextPair.getRight().getLeft(); + LocationSpecificHealthContext locationSpecificHealthContext = locationToLocationSpecificHealthContextPair.getRight().getRight(); + + String collectionLink = locationSpecificHealthContext.getLastCollectionLinkSeen(); + CosmosQueryRequestOptions queryRequestOptions = new CosmosQueryRequestOptions(); + queryRequestOptions.setFeedRange(new FeedRangeEpkImpl(partitionKeyRangeWrapper.getPartitionKeyRange().toRange())); + queryRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build()); + + List applicableReadEndpoints = globalEndpointManager + .getApplicableReadEndpoints(Collections.emptyList()) + .stream() + .map(locationEndpoint -> globalEndpointManager.getRegionName(locationEndpoint, OperationType.Read)) + .collect(Collectors.toList()); + + applicableReadEndpoints.remove(globalEndpointManager.getRegionName(locationWithStaleUnavailabilityInfo, OperationType.Read)); + + queryRequestOptions.setExcludedRegions(applicableReadEndpoints); + queryRequestOptionsAccessor.disablePerPartitionCircuitBreaker(queryRequestOptions); + + String spanName = "queryItems." + collectionLink; + + QueryFeedOperationState queryFeedOperationState = new QueryFeedOperationState( + this.rxDocClientSnapshot.get().getCachedCosmosAsyncClientSnapshot(), + spanName, + PathsHelper.getDatabasePath(collectionLink), + collectionLink, + ResourceType.Document, + OperationType.Read, + spanName, + queryRequestOptions, + new CosmosPagedFluxOptions()); + + RxDocumentClientImpl rxDocumentClient = this.rxDocClientSnapshot.get(); + + PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + + if (partitionLevelLocationUnavailabilityInfo != null) { + return rxDocumentClient + .queryDocuments(collectionLink, "SELECT * FROM C OFFSET 0 LIMIT 1", queryFeedOperationState, Document.class) + .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) + .doFinally(signalType -> { + if (signalType != SignalType.ON_ERROR && signalType != SignalType.CANCEL) { + + if (logger.isDebugEnabled()) { + logger.debug("Partition health recovery query for partition key ranger : {}-{} and " + + "collection rid : {} has succeeded...", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId()); + } + + partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationSpecificHealthContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + locationWithStaleUnavailabilityInfoAsKey, + false, + true); + } + return locationSpecificContextAsVal; + }); + } + }); } - return Mono.empty(); + return Flux.empty(); + }) + .onErrorResume(throwable -> { + if (logger.isDebugEnabled()) { + logger.debug("An exception was thrown trying to recover an Unavailable partition key range!"); + } + return Flux.empty(); }); } @@ -221,6 +327,10 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques return false; } + if (request.requestContext.isPerPartitionCircuitBreakerDisabledForRequest()) { + return false; + } + if (request.getResourceType() != ResourceType.Document) { return false; } @@ -240,6 +350,10 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques return applicableWriteEndpoints != null && applicableWriteEndpoints.size() > 1; } + public void setRxDocumentClientImplSnapshot(RxDocumentClientImpl rxDocumentClient) { + this.rxDocClientSnapshot.set(rxDocumentClient); + } + private class PartitionLevelLocationUnavailabilityInfo { private final ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition; @@ -252,7 +366,11 @@ private PartitionLevelLocationUnavailabilityInfo() { this.locationSpecificHealthContextTransitionHandler = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationSpecificHealthContextTransitionHandler; } - private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI locationWithException, boolean isReadOnlyRequest) { + private boolean handleException( + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String collectionLink, + URI locationWithException, + boolean isReadOnlyRequest) { AtomicBoolean isExceptionThresholdBreached = new AtomicBoolean(false); @@ -268,6 +386,7 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe .withUnavailableSince(Instant.MAX) .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) .withExceptionThresholdBreached(false) + .withLastCollectionLinkSeen(collectionLink) .build(); } @@ -291,7 +410,12 @@ private boolean handleException(PartitionKeyRangeWrapper partitionKeyRangeWrappe return isExceptionThresholdBreached.get(); } - private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, URI succeededLocation, boolean isReadOnlyRequest) { + private void handleSuccess( + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String collectionLink, + URI succeededLocation, + boolean isReadOnlyRequest) { + this.locationEndpointToLocationSpecificContextForPartition.compute(succeededLocation, (locationAsKey, locationSpecificContextAsVal) -> { LocationSpecificHealthContext locationSpecificHealthContextAfterTransition; @@ -306,6 +430,7 @@ private void handleSuccess(PartitionKeyRangeWrapper partitionKeyRangeWrapper, UR .withUnavailableSince(Instant.MAX) .withLocationHealthStatus(LocationHealthStatus.Healthy) .withExceptionThresholdBreached(false) + .withLastCollectionLinkSeen(collectionLink) .build(); } @@ -351,4 +476,31 @@ public ConsecutiveExceptionBasedCircuitBreaker getConsecutiveExceptionBasedCircu public PartitionLevelCircuitBreakerConfig getCircuitBreakerConfig() { return this.consecutiveExceptionBasedCircuitBreaker.getPartitionLevelCircuitBreakerConfig(); } + + private static String getCollectionLink(RxDocumentServiceRequest request) { + + checkNotNull(request, "Argument 'request' cannot be null!"); + checkNotNull(request.requestContext, "Argument 'request.requestContext' cannot be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker + = request.requestContext.getPointOperationContextForCircuitBreaker(); + FeedOperationContextForCircuitBreaker feedOperationContextForCircuitBreaker + = request.requestContext.getFeedOperationContextForCircuitBreaker(); + + if (pointOperationContextForCircuitBreaker != null) { + checkNotNull( + pointOperationContextForCircuitBreaker.getCollectionLink(), + "Argument 'pointOperationContextForCircuitBreaker.getCollectionLink()' cannot be null!"); + return pointOperationContextForCircuitBreaker.getCollectionLink(); + } + + if (feedOperationContextForCircuitBreaker != null) { + checkNotNull( + feedOperationContextForCircuitBreaker.getCollectionLink(), + "Argument 'feedOperationContextForCircuitBreaker.getCollectionLink()' cannot be null!"); + return feedOperationContextForCircuitBreaker.getCollectionLink(); + } + + throw new IllegalStateException("Both pointOperationContextForCircuitBreaker [or] feedOperationContextForCircuitBreaker cannot be null!"); + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java index e36a7ba879d7..7e2f90674f5d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java @@ -24,6 +24,7 @@ public class LocationSpecificHealthContext implements Serializable { private final Instant unavailableSince; private final LocationHealthStatus locationHealthStatus; private final boolean isExceptionThresholdBreached; + private final String lastCollectionLinkSeen; LocationSpecificHealthContext( int successCountForWriteForRecovery, @@ -32,7 +33,8 @@ public class LocationSpecificHealthContext implements Serializable { int exceptionCountForReadForCircuitBreaking, Instant unavailableSince, LocationHealthStatus locationHealthStatus, - boolean isExceptionThresholdBreached) { + boolean isExceptionThresholdBreached, + String lastCollectionLinkSeen) { this.successCountForWriteForRecovery = successCountForWriteForRecovery; this.exceptionCountForWriteForCircuitBreaking = exceptionCountForWriteForCircuitBreaking; @@ -41,6 +43,7 @@ public class LocationSpecificHealthContext implements Serializable { this.unavailableSince = unavailableSince; this.locationHealthStatus = locationHealthStatus; this.isExceptionThresholdBreached = isExceptionThresholdBreached; + this.lastCollectionLinkSeen = lastCollectionLinkSeen; } public boolean isExceptionThresholdBreached() { @@ -54,27 +57,31 @@ public boolean isRegionAvailableToProcessRequests() { } public int getExceptionCountForWriteForCircuitBreaking() { - return exceptionCountForWriteForCircuitBreaking; + return this.exceptionCountForWriteForCircuitBreaking; } public int getSuccessCountForWriteForRecovery() { - return successCountForWriteForRecovery; + return this.successCountForWriteForRecovery; } public int getExceptionCountForReadForCircuitBreaking() { - return exceptionCountForReadForCircuitBreaking; + return this.exceptionCountForReadForCircuitBreaking; } public int getSuccessCountForReadForRecovery() { - return successCountForReadForRecovery; + return this.successCountForReadForRecovery; } public Instant getUnavailableSince() { - return unavailableSince; + return this.unavailableSince; } public LocationHealthStatus getLocationHealthStatus() { - return locationHealthStatus; + return this.locationHealthStatus; + } + + public String getLastCollectionLinkSeen() { + return this.lastCollectionLinkSeen; } static class Builder { @@ -86,9 +93,9 @@ static class Builder { private Instant unavailableSince; private LocationHealthStatus locationHealthStatus; private boolean isExceptionThresholdBreached; + private String lastCollectionLinkSeen; - public Builder() { - } + public Builder() {} public Builder withExceptionCountForWriteForCircuitBreaking(int exceptionCountForWriteForCircuitBreaking) { this.exceptionCountForWriteForCircuitBreaking = exceptionCountForWriteForCircuitBreaking; @@ -125,17 +132,22 @@ public Builder withExceptionThresholdBreached(boolean exceptionThresholdBreached return this; } + public Builder withLastCollectionLinkSeen(String lastCollectionLinkSeen) { + this.lastCollectionLinkSeen = lastCollectionLinkSeen; + return this; + } + public LocationSpecificHealthContext build() { - LocationSpecificHealthContext locationSpecificHealthContext = new LocationSpecificHealthContext( + + return new LocationSpecificHealthContext( this.successCountForWriteForRecovery, this.exceptionCountForWriteForCircuitBreaking, this.successCountForReadForRecovery, this.exceptionCountForReadForCircuitBreaking, this.unavailableSince, this.locationHealthStatus, - this.isExceptionThresholdBreached); - - return locationSpecificHealthContext; + this.isExceptionThresholdBreached, + this.lastCollectionLinkSeen); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index 3cb5e3412fdf..59e100ae2f3c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -3,6 +3,7 @@ package com.azure.cosmos.implementation.circuitBreaker; +import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.OperationType; import org.slf4j.Logger; @@ -69,7 +70,7 @@ public LocationSpecificHealthContext handleSuccess( .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } - return this.transitionHealthStatus(LocationHealthStatus.Healthy); + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Healthy); } else { return locationSpecificHealthContextInner; } @@ -78,7 +79,7 @@ public LocationSpecificHealthContext handleSuccess( case Unavailable: Instant unavailableSinceActual = locationSpecificHealthContext.getUnavailableSince(); if (!forceStatusChange) { - if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(30)) > 0) { + if (Duration.between(unavailableSinceActual, Instant.now()).compareTo(Duration.ofSeconds(Configs.getAllowedPartitionUnavailabilityDurationInSeconds())) > 0) { if (logger.isDebugEnabled()) { logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", @@ -89,7 +90,7 @@ public LocationSpecificHealthContext handleSuccess( .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } - return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); } } else { @@ -102,7 +103,7 @@ public LocationSpecificHealthContext handleSuccess( .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } - return this.transitionHealthStatus(LocationHealthStatus.HealthyTentative); + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); } break; default: @@ -133,7 +134,7 @@ public LocationSpecificHealthContext handleException( .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } - return this.transitionHealthStatus(LocationHealthStatus.HealthyWithFailures); + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyWithFailures); case HealthyWithFailures: if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { @@ -163,7 +164,7 @@ public LocationSpecificHealthContext handleException( .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } - return this.transitionHealthStatus(LocationHealthStatus.Unavailable); + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); } case HealthyTentative: if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { @@ -179,23 +180,26 @@ public LocationSpecificHealthContext handleException( .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } - return this.transitionHealthStatus(LocationHealthStatus.Unavailable); + return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); } default: throw new IllegalStateException("Unsupported health status: " + currentLocationHealthStatusSnapshot); } } - public LocationSpecificHealthContext transitionHealthStatus(LocationHealthStatus newStatus) { + public LocationSpecificHealthContext transitionHealthStatus(LocationSpecificHealthContext locationSpecificHealthContext, LocationHealthStatus newStatus) { + + LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() + .withSuccessCountForWriteForRecovery(0) + .withExceptionCountForWriteForCircuitBreaking(0) + .withSuccessCountForReadForRecovery(0) + .withExceptionCountForReadForCircuitBreaking(0) + .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()); switch (newStatus) { case Healthy: - return new LocationSpecificHealthContext.Builder() - .withSuccessCountForWriteForRecovery(0) - .withExceptionCountForWriteForCircuitBreaking(0) - .withSuccessCountForReadForRecovery(0) - .withExceptionCountForReadForCircuitBreaking(0) + return builder .withUnavailableSince(Instant.MAX) .withLocationHealthStatus(LocationHealthStatus.Healthy) .withExceptionThresholdBreached(false) @@ -203,11 +207,7 @@ public LocationSpecificHealthContext transitionHealthStatus(LocationHealthStatus case HealthyWithFailures: - return new LocationSpecificHealthContext.Builder() - .withSuccessCountForWriteForRecovery(0) - .withExceptionCountForWriteForCircuitBreaking(0) - .withSuccessCountForReadForRecovery(0) - .withExceptionCountForReadForCircuitBreaking(0) + return builder .withUnavailableSince(Instant.MAX) .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) .withExceptionThresholdBreached(false) @@ -215,11 +215,7 @@ public LocationSpecificHealthContext transitionHealthStatus(LocationHealthStatus case Unavailable: - return new LocationSpecificHealthContext.Builder() - .withSuccessCountForWriteForRecovery(0) - .withExceptionCountForWriteForCircuitBreaking(0) - .withSuccessCountForReadForRecovery(0) - .withExceptionCountForReadForCircuitBreaking(0) + return builder .withUnavailableSince(Instant.now()) .withLocationHealthStatus(LocationHealthStatus.Unavailable) .withExceptionThresholdBreached(true) @@ -227,11 +223,7 @@ public LocationSpecificHealthContext transitionHealthStatus(LocationHealthStatus case HealthyTentative: - return new LocationSpecificHealthContext.Builder() - .withSuccessCountForWriteForRecovery(0) - .withExceptionCountForWriteForCircuitBreaking(0) - .withSuccessCountForReadForRecovery(0) - .withExceptionCountForReadForCircuitBreaking(0) + return builder .withUnavailableSince(Instant.now()) .withLocationHealthStatus(LocationHealthStatus.HealthyTentative) .withExceptionThresholdBreached(false) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java index 8c18e3d523fb..865203676512 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DefaultDocumentQueryExecutionContext.java @@ -187,7 +187,8 @@ protected Function>> executeInter OperationType.Query, this::createClientRetryPolicyInstance, req, - this::executeInternalFuncCore); + this::executeInternalFuncCore, + PathsHelper.getCollectionPath(super.resourceLink)); } private Mono> executeInternalFuncCore( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java index d9e98951f282..65c1cd18176e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/DocumentProducer.java @@ -181,7 +181,8 @@ public DocumentProducer( return null; }, request, - executeFeedOperationCore); + executeFeedOperationCore, + collectionLink); }; this.lastResponseContinuationToken = initialContinuationToken; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java index 28127efc8cf3..75868822962f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/IDocumentQueryClient.java @@ -4,6 +4,7 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.implementation.DocumentClientRetryPolicy; +import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.OperationType; @@ -64,7 +65,8 @@ Mono executeFeedOperationWithAvailabilityStrategy( final OperationType operationType, final Supplier retryPolicyFactory, final RxDocumentServiceRequest req, - final BiFunction, RxDocumentServiceRequest, Mono> feedOperation); + final BiFunction, RxDocumentServiceRequest, Mono> feedOperation, + final String collectionLink); CosmosItemSerializer getEffectiveItemSerializer(CosmosQueryRequestOptions queryRequestOptions); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java index 0596e77dd2cd..568ed0e85f36 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/NonStreamingOrderByDocumentQueryExecutionContext.java @@ -155,7 +155,9 @@ protected NonStreamingOrderByDocumentProducer createDocumentProducer( TriFunction createRequestFunc, Function>> executeFunc, Supplier createRetryPolicyFunc, - FeedRangeEpkImpl feedRange) { + FeedRangeEpkImpl feedRange, + String collectionLink) { + return new NonStreamingOrderByDocumentProducer( consumeComparer, client, @@ -164,7 +166,7 @@ protected NonStreamingOrderByDocumentProducer createDocumentProducer( createRequestFunc, executeFunc, feedRange, - collectionRid, + collectionLink, createRetryPolicyFunc, Document.class, correlatedActivityId, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java index c8a110853a7d..f94731115d4d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/OrderByDocumentQueryExecutionContext.java @@ -553,7 +553,9 @@ protected OrderByDocumentProducer createDocumentProducer( Map commonRequestHeaders, TriFunction createRequestFunc, Function>> executeFunc, - Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange) { + Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange, + String collectionLink) { + return new OrderByDocumentProducer(consumeComparer, client, collectionRid, @@ -561,7 +563,7 @@ protected OrderByDocumentProducer createDocumentProducer( createRequestFunc, executeFunc, feedRange, - collectionRid, + collectionLink, createRetryPolicyFunc, resourceType, correlatedActivityId, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java index 907532f54f7d..d51f50c39b56 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContext.java @@ -495,13 +495,14 @@ protected DocumentProducer createDocumentProducer( Map commonRequestHeaders, TriFunction createRequestFunc, Function>> executeFunc, - Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange) { + Supplier createRetryPolicyFunc, FeedRangeEpkImpl feedRange, + String collectionLink) { return new DocumentProducer<>(client, collectionRid, cosmosQueryRequestOptions, createRequestFunc, executeFunc, - collectionRid, + collectionLink, createRetryPolicyFunc, resourceType, correlatedActivityId, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java index 799fe0f7261b..629e24c5ef45 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/ParallelDocumentQueryExecutionContextBase.java @@ -109,7 +109,8 @@ protected void initialize( createRequestFunc, executeFunc, () -> client.getResetSessionTokenRetryPolicy().getRequestPolicy(this.diagnosticsClientContext), - targetRange); + targetRange, + collection.getSelfLink()); documentProducers.add(dp); } @@ -125,7 +126,8 @@ abstract protected DocumentProducer createDocumentProducer(String collectionR Function>> executeFunc, Supplier createRetryPolicyFunc, - FeedRangeEpkImpl feedRange); + FeedRangeEpkImpl feedRange, + String collectionLink); @Override abstract public Flux> drainAsync(int maxPageSize); @@ -180,7 +182,8 @@ protected void initializeReadMany( createRequestFunc, executeFunc, () -> client.getResetSessionTokenRetryPolicy().getRequestPolicy(this.diagnosticsClientContext), - feedRangeEpk); + feedRangeEpk, + collection.getSelfLink()); documentProducers.add(dp); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java index 40024c332fea..640c1c72bb74 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/query/QueryPlanRetriever.java @@ -8,6 +8,7 @@ import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.PathsHelper; import com.azure.cosmos.implementation.routing.PartitionKeyInternal; import com.azure.cosmos.models.CosmosQueryRequestOptions; import com.azure.cosmos.models.ModelBridgeInternal; @@ -125,7 +126,7 @@ static Mono getQueryPlanThroughGatewayAsync(Diagn OperationType.QueryPlan, () -> queryClient.getResetSessionTokenRetryPolicy().getRequestPolicy(diagnosticsClientContext), queryPlanRequest, - executeFunc - ); + executeFunc, + PathsHelper.getCollectionPath(resourceLink)); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index a3f65e5db054..f793f8dc89d0 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -520,13 +520,13 @@ CosmosQueryRequestOptions setPartitionKeyRangeIdInternal(String partitionKeyRang return this; } - PartitionKeyDefinition getPartitionKeyDefinition() { - return this.actualRequestOptions.getPartitionKeyDefinition(); + CosmosQueryRequestOptions disablePerPartitionCircuitBreaking() { + this.actualRequestOptions.setPerPartitionCircuitBreakingDisabled(true); + return this; } - CosmosQueryRequestOptions setPartitionKeyDefinition(PartitionKeyDefinition partitionKeyDefinition) { - this.actualRequestOptions.setPartitionKeyDefinition(partitionKeyDefinition); - return this; + boolean isPerPartitionCircuitBreakingDisabled() { + return this.actualRequestOptions.isPerPartitionCircuitBreakingDisabled(); } /////////////////////////////////////////////////////////////////////////////////////////// @@ -646,6 +646,16 @@ public void setCollectionRid(CosmosQueryRequestOptions options, String collectio public String getCollectionRid(CosmosQueryRequestOptions options) { return options.actualRequestOptions.getCollectionRid(); } + + @Override + public boolean isPerPartitionCircuitBreakerDisabled(CosmosQueryRequestOptions options) { + return options.actualRequestOptions.isPerPartitionCircuitBreakingDisabled(); + } + + @Override + public void disablePerPartitionCircuitBreaker(CosmosQueryRequestOptions options) { + options.actualRequestOptions.setPerPartitionCircuitBreakingDisabled(true); + } }); } From 89446c49375dc417588284691fe42e4d12a4ad8c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 4 Jul 2024 21:50:07 -0400 Subject: [PATCH 107/140] Reacting to review comments. --- ...ionEndpointManagerForCircuitBreakerTests.java | 16 ++++++++++++++++ .../CosmosChangeFeedRequestOptionsImpl.java | 1 - .../cosmos/implementation/RequestOptions.java | 1 - 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 35abc9bae76c..b7d188b38c1c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -7,6 +7,7 @@ import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.PointOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.RxDocumentServiceRequest; @@ -34,6 +35,7 @@ import java.util.concurrent.ScheduledFuture; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; import static com.azure.cosmos.implementation.TestUtils.mockDiagnosticsClientContext; @@ -114,6 +116,7 @@ public void recordHealthyStatus(String partitionLevelCircuitBreakerConfigAsJsonS ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -178,6 +181,7 @@ public void recordHealthyToHealthyWithFailuresStatusTransition(String partitionL ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -245,6 +249,7 @@ public void recordHealthyWithFailuresToUnavailableStatusTransition(String partit ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -329,6 +334,7 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -424,6 +430,7 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -526,6 +533,7 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -627,6 +635,7 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -698,6 +707,7 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St ResourceType.Document, collectionResourceId1, pkRangeId, + collectionResourceId1, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -707,6 +717,7 @@ public void multiContainerBothWithSinglePartitionHealthyToUnavailableHandling(St ResourceType.Document, collectionResourceId2, pkRangeId, + collectionResourceId2, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -812,6 +823,7 @@ public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLeve ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationCentralUsEndpointToLocationPair.getKey()); @@ -821,6 +833,7 @@ public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLeve ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUsEndpointToLocationPair.getKey()); @@ -830,6 +843,7 @@ public void allRegionsUnavailableHandlingWithMultiThreading(String partitionLeve ResourceType.Document, collectionResourceId, pkRangeId, + collectionResourceId, minInclusive, maxExclusive, LocationEastUs2EndpointToLocationPair.getKey()); @@ -927,6 +941,7 @@ private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( ResourceType resourceType, String collectionResourceId, String partitionKeyRangeId, + String collectionLink, String minInclusive, String maxExclusive, URI locationEndpointToRoute) { @@ -941,6 +956,7 @@ private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( request.requestContext.resolvedPartitionKeyRange = new PartitionKeyRange(partitionKeyRangeId, minInclusive, maxExclusive); request.requestContext.locationEndpointToRoute = locationEndpointToRoute; request.requestContext.setExcludeRegions(Collections.emptyList()); + request.requestContext.setPointOperationContext(new PointOperationContextForCircuitBreaker(new AtomicBoolean(false), false, collectionLink)); return request; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java index 19b9cc57f706..6c76508025f4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosChangeFeedRequestOptionsImpl.java @@ -17,7 +17,6 @@ import com.azure.cosmos.models.DedicatedGatewayRequestOptions; import com.azure.cosmos.models.FeedRange; import com.azure.cosmos.models.PartitionKeyDefinition; -import com.azure.cosmos.models.ReadOnlyRequestOptions; import com.azure.cosmos.util.Beta; import java.util.HashMap; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java index e69b36450fc3..e9ee8e350a4c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RequestOptions.java @@ -13,7 +13,6 @@ import com.azure.cosmos.models.DedicatedGatewayRequestOptions; import com.azure.cosmos.models.IndexingDirective; import com.azure.cosmos.models.PartitionKey; -import com.azure.cosmos.models.ReadOnlyRequestOptions; import com.azure.cosmos.models.PartitionKeyDefinition; import com.azure.cosmos.models.ThroughputProperties; From e43016d67523b70220f56eae8d5381b2c837618a Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 5 Jul 2024 17:20:45 -0400 Subject: [PATCH 108/140] Fixing live tests pipeline. --- .../cosmos/benchmark/AsyncBenchmark.java | 5 +- .../azure/cosmos/benchmark/SyncBenchmark.java | 5 +- .../azure/cosmos/CosmosDiagnosticsTest.java | 9 ++- .../implementation/RxDocumentClientImpl.java | 59 ++++++++++--------- 4 files changed, 42 insertions(+), 36 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java index c7a349e28891..d71a415b2370 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/AsyncBenchmark.java @@ -36,6 +36,7 @@ import com.codahale.metrics.jvm.MemoryUsageGaugeSet; import io.micrometer.core.instrument.MeterRegistry; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; import org.mpierce.metrics.reservoir.hdrhistogram.HdrHistogramResetOnSnapshotReservoir; import org.reactivestreams.Subscription; import org.slf4j.Logger; @@ -150,8 +151,8 @@ abstract class AsyncBenchmark { cosmosClient = cosmosClientBuilder.buildAsyncClient(); CosmosClient syncClient = cosmosClientBuilder - .endpoint(configuration.getServiceEndpointForRunResultsUploadAccount()) - .key(configuration.getMasterKeyForRunResultsUploadAccount()) + .endpoint(StringUtils.isNotEmpty(configuration.getServiceEndpointForRunResultsUploadAccount()) ? configuration.getServiceEndpointForRunResultsUploadAccount() : configuration.getServiceEndpoint()) + .key(StringUtils.isNotEmpty(configuration.getMasterKeyForRunResultsUploadAccount()) ? configuration.getMasterKeyForRunResultsUploadAccount() : configuration.getMasterKey()) .buildClient(); try { diff --git a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java index dea8dd6449d6..79faed4b831c 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java +++ b/sdk/cosmos/azure-cosmos-benchmark/src/main/java/com/azure/cosmos/benchmark/SyncBenchmark.java @@ -32,6 +32,7 @@ import com.codahale.metrics.jvm.MemoryUsageGaugeSet; import io.micrometer.core.instrument.MeterRegistry; import org.apache.commons.lang3.RandomStringUtils; +import org.apache.commons.lang3.StringUtils; import org.mpierce.metrics.reservoir.hdrhistogram.HdrHistogramResetOnSnapshotReservoir; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,8 +159,8 @@ public T apply(T o, Throwable throwable) { cosmosClient = cosmosClientBuilder.buildClient(); CosmosClient syncClient = cosmosClientBuilder - .endpoint(configuration.getServiceEndpointForRunResultsUploadAccount()) - .key(configuration.getMasterKeyForRunResultsUploadAccount()) + .endpoint(StringUtils.isNotEmpty(configuration.getServiceEndpointForRunResultsUploadAccount()) ? configuration.getServiceEndpointForRunResultsUploadAccount() : configuration.getServiceEndpoint()) + .key(StringUtils.isNotEmpty(configuration.getMasterKeyForRunResultsUploadAccount()) ? configuration.getMasterKeyForRunResultsUploadAccount() : configuration.getMasterKey()) .buildClient(); try { diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java index f692a2433031..c09033463b12 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java @@ -353,7 +353,7 @@ public void systemDiagnosticsForSystemStateInformation() { assertThat(createResponse.getDiagnostics().getDuration()).isNotNull(); } - @Test(groups = {"fast"}, timeOut = TIMEOUT) + @Test(groups = {"fast"}/*, timeOut = TIMEOUT*/) public void directDiagnostics() throws Exception { InternalObjectNode internalObjectNode = getInternalObjectNode(); CosmosItemResponse createResponse = containerDirect.createItem(internalObjectNode); @@ -770,8 +770,11 @@ private void validateDirectModeDiagnosticsOnSuccess( assertThat(diagnostics).contains("supplementalResponseStatisticsList"); assertThat(diagnostics).contains("gatewayStatisticsList"); assertThat(diagnostics).contains("addressResolutionStatistics"); - assertThat(diagnostics).contains("\"metaDataName\":\"CONTAINER_LOOK_UP\""); - assertThat(diagnostics).contains("\"metaDataName\":\"PARTITION_KEY_RANGE_LOOK_UP\""); + // todo: Container and partition key range look up are preempted before RxDocumentServiceRequest and it diagnostic instantiation + // todo: for partition-level circuit breaker - may not always appear in diagnostics + // and may not be part of diagnostics +// assertThat(diagnostics).contains("\"metaDataName\":\"CONTAINER_LOOK_UP\""); +// assertThat(diagnostics).contains("\"metaDataName\":\"PARTITION_KEY_RANGE_LOOK_UP\""); assertThat(diagnostics).contains("\"metaDataName\":\"SERVER_ADDRESS_LOOKUP\""); assertThat(diagnostics).contains("\"serializationType\":\"PARTITION_KEY_FETCH_SERIALIZATION\""); assertThat(diagnostics).contains("\"userAgent\":\"" + userAgent + "\""); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 24c1c2762c35..7a6f0e166502 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1863,6 +1863,9 @@ private Mono getCreateDocumentRequest(DocumentClientRe request.requestContext.setExcludeRegions(options.getExcludedRegions()); } + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); if (serializationDiagnosticsContext != null) { @@ -2234,9 +2237,9 @@ public Mono> createDocument( pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink, - StringUtils.EMPTY); + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink + ); } private Mono> createDocumentCore( @@ -2508,9 +2511,9 @@ public Mono> upsertDocument(String collectionLink, Ob (opt, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap) -> upsertDocumentCore( collectionLink, document, opt, disableAutomaticIdGeneration, e2ecfg, clientCtxOverride, pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink, - StringUtils.EMPTY); + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink + ); } private Mono> upsertDocumentCore( @@ -2615,9 +2618,9 @@ public Mono> replaceDocument(String documentLink, Obj pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink, - documentLink); + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink + ); } private Mono> replaceDocumentCore( @@ -2713,9 +2716,9 @@ public Mono> replaceDocument(Document document, Reque pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink, - StringUtils.EMPTY); + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink + ); } private Mono> replaceDocumentCore( @@ -2909,9 +2912,9 @@ public Mono> patchDocument(String documentLink, pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink, - documentLink); + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink + ); } private Mono> patchDocumentCore( @@ -3048,9 +3051,9 @@ public Mono> deleteDocument(String documentLink, Requ pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink, - documentLink); + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink + ); } @Override @@ -3070,9 +3073,9 @@ public Mono> deleteDocument(String documentLink, Inte pointOperationContextForCircuitBreaker, collectionRoutingMap), options, - options != null && options.getNonIdempotentWriteRetriesEnabled(), - collectionLink, - documentLink); + options != null && options.getNonIdempotentWriteRetriesEnabled() != null && options.getNonIdempotentWriteRetriesEnabled(), + collectionLink + ); } private Mono> deleteDocumentCore( @@ -3226,8 +3229,8 @@ private Mono> readDocument( options, false, innerDiagnosticsFactory, - collectionLink, - documentLink); + collectionLink + ); } private Mono> readDocumentCore( @@ -5921,8 +5924,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat DocumentPointOperation callback, RequestOptions initialRequestOptions, boolean idempotentWriteRetriesEnabled, - String collectionLink, - String documentLink) { + String collectionLink) { return wrapPointOperationWithAvailabilityStrategy( resourceType, @@ -5931,8 +5933,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat initialRequestOptions, idempotentWriteRetriesEnabled, this, - collectionLink, - documentLink); + collectionLink + ); } private Mono> wrapPointOperationWithAvailabilityStrategy( @@ -5942,8 +5944,7 @@ private Mono> wrapPointOperationWithAvailabilityStrat RequestOptions initialRequestOptions, boolean idempotentWriteRetriesEnabled, DiagnosticsClientContext innerDiagnosticsFactory, - String collectionLink, - String documentLink) { + String collectionLink) { return Mono.defer(() -> this.collectionCache.resolveByNameAsync(null, collectionLink, null) .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) From ea80aab98bc9ed981d3fdc644d8da1898e51251f Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 5 Jul 2024 18:14:08 -0400 Subject: [PATCH 109/140] Fixing live tests pipeline. --- .../azure/cosmos/CosmosDiagnosticsTest.java | 7 +- ...EndpointManagerForCircuitBreakerTests.java | 3 +- .../com/azure/cosmos/CosmosDiagnostics.java | 15 ++++ .../ClientSideRequestStatistics.java | 10 +++ .../ImplementationBridgeHelpers.java | 2 + ...ointOperationContextForCircuitBreaker.java | 9 ++- .../implementation/RxDocumentClientImpl.java | 80 ++++++++++++++++--- 7 files changed, 107 insertions(+), 19 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java index c09033463b12..bdb46d404c50 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java @@ -770,11 +770,8 @@ private void validateDirectModeDiagnosticsOnSuccess( assertThat(diagnostics).contains("supplementalResponseStatisticsList"); assertThat(diagnostics).contains("gatewayStatisticsList"); assertThat(diagnostics).contains("addressResolutionStatistics"); - // todo: Container and partition key range look up are preempted before RxDocumentServiceRequest and it diagnostic instantiation - // todo: for partition-level circuit breaker - may not always appear in diagnostics - // and may not be part of diagnostics -// assertThat(diagnostics).contains("\"metaDataName\":\"CONTAINER_LOOK_UP\""); -// assertThat(diagnostics).contains("\"metaDataName\":\"PARTITION_KEY_RANGE_LOOK_UP\""); + assertThat(diagnostics).contains("\"metaDataName\":\"CONTAINER_LOOK_UP\""); + assertThat(diagnostics).contains("\"metaDataName\":\"PARTITION_KEY_RANGE_LOOK_UP\""); assertThat(diagnostics).contains("\"metaDataName\":\"SERVER_ADDRESS_LOOKUP\""); assertThat(diagnostics).contains("\"serializationType\":\"PARTITION_KEY_FETCH_SERIALIZATION\""); assertThat(diagnostics).contains("\"userAgent\":\"" + userAgent + "\""); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index b7d188b38c1c..143d73d5ce28 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -5,6 +5,7 @@ import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.PointOperationContextForCircuitBreaker; @@ -956,7 +957,7 @@ private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( request.requestContext.resolvedPartitionKeyRange = new PartitionKeyRange(partitionKeyRangeId, minInclusive, maxExclusive); request.requestContext.locationEndpointToRoute = locationEndpointToRoute; request.requestContext.setExcludeRegions(Collections.emptyList()); - request.requestContext.setPointOperationContext(new PointOperationContextForCircuitBreaker(new AtomicBoolean(false), false, collectionLink)); + request.requestContext.setPointOperationContext(new PointOperationContextForCircuitBreaker(new AtomicBoolean(false), false, collectionLink, new MetadataDiagnosticsContext())); return request; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java index bf708878c8de..9f76557549ee 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java @@ -6,6 +6,7 @@ import com.azure.cosmos.implementation.DiagnosticsClientContext; import com.azure.cosmos.implementation.FeedResponseDiagnostics; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.util.Beta; @@ -484,6 +485,20 @@ public URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics return cosmosDiagnostics.getFirstContactedLocationEndpoint(); } + + @Override + public void mergeMetadataDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, MetadataDiagnosticsContext otherMetadataDiagnosticsContext) { + + if (cosmosDiagnostics == null) { + return; + } + + ClientSideRequestStatistics clientSideRequestStatistics = cosmosDiagnostics.clientSideRequestStatistics; + + if (clientSideRequestStatistics != null) { + clientSideRequestStatistics.mergeMetadataDiagnosticsContext(otherMetadataDiagnosticsContext); + } + } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index ddf67f04cd66..3f2e9ac83b18 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -265,6 +265,16 @@ public int getRequestPayloadSizeInBytes() { return this.requestPayloadSizeInBytes; } + public void mergeMetadataDiagnosticsContext(MetadataDiagnosticsContext other) { + if (other == null || other.metadataDiagnosticList == null || other.metadataDiagnosticList.isEmpty()) { + return; + } + + for (MetadataDiagnosticsContext.MetadataDiagnostics metadataDiagnostics : other.metadataDiagnosticList) { + this.metadataDiagnosticsContext.addMetaDataDiagnostic(metadataDiagnostics); + } + } + public String recordAddressResolutionStart( URI targetEndpoint, boolean forceRefresh, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 32d1bb5e1f2f..2bf1e23307e5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -889,6 +889,8 @@ void recordAddressResolutionEnd( void setDiagnosticsContext(CosmosDiagnostics cosmosDiagnostics, CosmosDiagnosticsContext ctx); URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics); + + void mergeMetadataDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, MetadataDiagnosticsContext otherMetadataDiagnosticsContext); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java index 465a27b016f1..a5ad4adb767a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java @@ -11,15 +11,18 @@ public class PointOperationContextForCircuitBreaker { private final boolean isThresholdBasedAvailabilityStrategyEnabled; private boolean isRequestHedged; private final String collectionLink; + private final MetadataDiagnosticsContext metadataDiagnosticsContext; public PointOperationContextForCircuitBreaker( AtomicBoolean hasOperationSeenSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled, - String collectionLink) { + String collectionLink, + MetadataDiagnosticsContext metadataDiagnosticsContext) { this.hasOperationSeenSuccess = hasOperationSeenSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; this.collectionLink = collectionLink; + this.metadataDiagnosticsContext = metadataDiagnosticsContext; } public void setIsRequestHedged(boolean isRequestHedged) { @@ -45,4 +48,8 @@ public boolean isThresholdBasedAvailabilityStrategyEnabled() { public String getCollectionLink() { return this.collectionLink; } + + public MetadataDiagnosticsContext getMetadataDiagnosticsContext() { + return this.metadataDiagnosticsContext; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 7a6f0e166502..3b0c59d58630 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1824,7 +1824,8 @@ private Mono getCreateDocumentRequest(DocumentClientRe RequestOptions options, boolean disableAutomaticIdGeneration, OperationType operationType, - DiagnosticsClientContext clientContextOverride) { + DiagnosticsClientContext clientContextOverride, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { if (StringUtils.isEmpty(documentCollectionLink)) { throw new IllegalArgumentException("documentCollectionLink"); @@ -1872,6 +1873,11 @@ private Mono getCreateDocumentRequest(DocumentClientRe serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); return addPartitionKeyInformation(request, content, document, options, collectionObs); } @@ -1922,14 +1928,24 @@ private Mono getBatchDocumentRequest(DocumentClientRet request.requestContext.setExcludeRegions(options.getExcludedRegions()); } + // note: calling onBeforeSendRequest is a cheap operation which injects a CosmosDiagnostics + // instance into 'request' amongst other things - this way metadataDiagnosticsContext is not + // null and can be used for metadata-related telemetry (partition key range, container and server address lookups) + if (requestRetryPolicy != null) { + requestRetryPolicy.onBeforeSendRequest(request); + } + + MetadataDiagnosticsContext metadataDiagnosticsContext = BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics); + request.requestContext.setPointOperationContext( new PointOperationContextForCircuitBreaker( new AtomicBoolean(false), false, - documentCollectionLink)); + documentCollectionLink, + metadataDiagnosticsContext)); - return this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request) - .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(null, documentCollectionValueHolder.v.getResourceId(), null, null) + return this.collectionCache.resolveCollectionAsync(metadataDiagnosticsContext, request) + .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, documentCollectionValueHolder.v.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { addBatchHeaders(request, serverBatchRequest, documentCollectionValueHolder.v); @@ -2295,8 +2311,15 @@ private Mono> createDocumentInternal( try { logger.debug("Creating a Document. collectionLink: [{}]", collectionLink); - Mono requestObs = getCreateDocumentRequest(requestRetryPolicy, collectionLink, document, - options, disableAutomaticIdGeneration, OperationType.Create, clientContextOverride); + Mono requestObs = getCreateDocumentRequest( + requestRetryPolicy, + collectionLink, + document, + options, + disableAutomaticIdGeneration, + OperationType.Create, + clientContextOverride, + pointOperationContextForCircuitBreaker); return requestObs .flatMap(request -> { @@ -2576,7 +2599,8 @@ private Mono> upsertDocumentInternal( options, disableAutomaticIdGeneration, OperationType.Upsert, - clientContextOverride); + clientContextOverride, + pointOperationContextForCircuitBreaker); return reqObs .flatMap(request -> { @@ -2833,9 +2857,18 @@ private Mono> replaceDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludedRegions()); } + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); + + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } @@ -2999,13 +3032,22 @@ private Mono> patchDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludedRegions()); } + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); + if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); @@ -3143,6 +3185,15 @@ private Mono> deleteDocumentInternal( request.requestContext.setExcludeRegions(options.getExcludedRegions()); } + if (retryPolicyInstance != null) { + retryPolicyInstance.onBeforeSendRequest(request); + } + + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); @@ -5946,8 +5997,10 @@ private Mono> wrapPointOperationWithAvailabilityStrat DiagnosticsClientContext innerDiagnosticsFactory, String collectionLink) { - return Mono.defer(() -> this.collectionCache.resolveByNameAsync(null, collectionLink, null) - .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(null, collection.getResourceId(), null, null) + final MetadataDiagnosticsContext metadataDiagnosticsContext = new MetadataDiagnosticsContext(); + + return Mono.defer(() -> this.collectionCache.resolveByNameAsync(metadataDiagnosticsContext, collectionLink, null) + .flatMap(collection -> this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, collection.getResourceId(), null, null) .flatMap(collectionRoutingMapValueHolder -> { checkNotNull(resourceType, "Argument 'resourceType' must not be null."); @@ -5984,7 +6037,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreakerForMainRequest = new PointOperationContextForCircuitBreaker( isOperationSuccessful, false, - collectionLink); + collectionLink, + metadataDiagnosticsContext); pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder); @@ -6010,7 +6064,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat = new PointOperationContextForCircuitBreaker( isOperationSuccessful, true, - collectionLink); + collectionLink, + metadataDiagnosticsContext); pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); Mono initialMonoAcrossAllRegions = @@ -6045,7 +6100,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat = new PointOperationContextForCircuitBreaker( isOperationSuccessful, true, - collectionLink); + collectionLink, + metadataDiagnosticsContext); pointOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); Mono regionalCrossRegionRetryMono = From ba695cf629946c03e793de60afc24999e5507e86 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 5 Jul 2024 20:48:27 -0400 Subject: [PATCH 110/140] Fixing live tests pipeline. --- .../azure/cosmos/implementation/RxDocumentClientImplTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java index 139a0f45d79b..5aaaa914dc21 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java @@ -205,7 +205,7 @@ public void readMany() { .thenReturn(Mono.just(dummyCollectionObs())); Mockito - .when(this.collectionCacheMock.resolveByNameAsync(Mockito.isNull(), Mockito.anyString(), Mockito.isNull())) + .when(this.collectionCacheMock.resolveByNameAsync(Mockito.any(), Mockito.anyString(), Mockito.isNull())) .thenReturn(Mono.just(dummyCollectionObs().v)); Mockito From 7a2f38135d5f2c015da0c9be870f6d40581b8fc9 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 5 Jul 2024 21:00:14 -0400 Subject: [PATCH 111/140] Fixing live tests pipeline. --- .../src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java index bdb46d404c50..f692a2433031 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/CosmosDiagnosticsTest.java @@ -353,7 +353,7 @@ public void systemDiagnosticsForSystemStateInformation() { assertThat(createResponse.getDiagnostics().getDuration()).isNotNull(); } - @Test(groups = {"fast"}/*, timeOut = TIMEOUT*/) + @Test(groups = {"fast"}, timeOut = TIMEOUT) public void directDiagnostics() throws Exception { InternalObjectNode internalObjectNode = getInternalObjectNode(); CosmosItemResponse createResponse = containerDirect.createItem(internalObjectNode); From ad0fd516e990887a3227c18098374aab8476290d Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 6 Jul 2024 11:11:39 -0400 Subject: [PATCH 112/140] Fixing live tests pipeline. --- ...itionEndpointManagerForCircuitBreaker.java | 53 ++++++++++--------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 3195e822af41..4fd0c6b3df32 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -230,7 +230,12 @@ private Flux updateStaleLocationInfo() { LocationSpecificHealthContext locationSpecificHealthContext = locationToLocationLevelMetrics.getValue(); if (!locationSpecificHealthContext.isRegionAvailableToProcessRequests()) { - locationToLocationSpecificHealthContextList.add(Pair.of(partitionKeyRangeWrapper, Pair.of(locationWithStaleUnavailabilityInfo, locationSpecificHealthContext))); + locationToLocationSpecificHealthContextList.add( + Pair.of( + partitionKeyRangeWrapper, + Pair.of( + locationWithStaleUnavailabilityInfo, + locationSpecificHealthContext))); } } @@ -283,31 +288,29 @@ private Flux updateStaleLocationInfo() { return rxDocumentClient .queryDocuments(collectionLink, "SELECT * FROM C OFFSET 0 LIMIT 1", queryFeedOperationState, Document.class) .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) - .doFinally(signalType -> { - if (signalType != SignalType.ON_ERROR && signalType != SignalType.CANCEL) { - - if (logger.isDebugEnabled()) { - logger.debug("Partition health recovery query for partition key ranger : {}-{} and " + - "collection rid : {} has succeeded...", - partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), - partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId()); - } - - partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { - - if (locationSpecificContextAsVal != null) { - locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationSpecificHealthContextTransitionHandler.handleSuccess( - locationSpecificContextAsVal, - partitionKeyRangeWrapper, - locationWithStaleUnavailabilityInfoAsKey, - false, - true); - } - return locationSpecificContextAsVal; - }); + .doOnComplete(() -> { + + if (logger.isDebugEnabled()) { + logger.debug("Partition health recovery query for partition key ranger : {}-{} and " + + "collection rid : {} has succeeded...", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getResourceId()); } + + partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationSpecificHealthContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + locationWithStaleUnavailabilityInfoAsKey, + false, + true); + } + return locationSpecificContextAsVal; + }); }); } From fceaa31502ced0a5ef6c7fcd8a047878885f38d3 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 6 Jul 2024 18:32:29 -0400 Subject: [PATCH 113/140] Fixing CI pipeline. --- .../GlobalPartitionEndpointManagerForCircuitBreaker.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 4fd0c6b3df32..3bc11f587fa2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -291,7 +291,7 @@ private Flux updateStaleLocationInfo() { .doOnComplete(() -> { if (logger.isDebugEnabled()) { - logger.debug("Partition health recovery query for partition key ranger : {}-{} and " + + logger.debug("Partition health recovery query for partition key range : {}-{} and " + "collection rid : {} has succeeded...", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), From 8a47de3ab1e7e42cef2587023dc1fbfa0e965759 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 6 Jul 2024 19:06:41 -0400 Subject: [PATCH 114/140] Fixing CI pipeline. --- .../java/com/azure/cosmos/implementation/Configs.java | 2 +- .../GlobalPartitionEndpointManagerForCircuitBreaker.java | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 4bd07c63d058..ed6caf718cfb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -210,7 +210,7 @@ public class Configs { private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS"; private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; private static final String STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = "COSMOS.STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS"; - private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 60; + private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 600; private static final String ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = "COSMOS.ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS"; private static final int DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = 30; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 3bc11f587fa2..6d711e11b632 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -215,7 +215,6 @@ private Flux updateStaleLocationInfo() { .flatMap(partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair -> { logger.debug("Background updateStaleLocationInfo kicking in..."); - PartitionKeyRangeWrapper partitionKeyRangeWrapper = partitionKeyRangeWrapperToPartitionKeyRangeWrapperPair.getKey(); PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); @@ -239,7 +238,12 @@ private Flux updateStaleLocationInfo() { } } - return Flux.fromIterable(locationToLocationSpecificHealthContextList); + if (locationToLocationSpecificHealthContextList.isEmpty()) { + this.partitionKeyRangesWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); + return Flux.empty(); + } else { + return Flux.fromIterable(locationToLocationSpecificHealthContextList); + } } else { this.partitionKeyRangesWithPossibleUnavailableRegions.remove(partitionKeyRangeWrapper); return Mono.empty(); From a24f3a48cb49217d6f7b550011552f9caab5bf25 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 7 Jul 2024 13:09:30 -0400 Subject: [PATCH 115/140] Fixing CI pipeline. --- .../main/java/com/azure/cosmos/implementation/Configs.java | 2 +- .../azure/cosmos/implementation/RxDocumentClientImpl.java | 7 +++++-- .../implementation/directconnectivity/AddressResolver.java | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index ed6caf718cfb..4bd07c63d058 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -210,7 +210,7 @@ public class Configs { private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS"; private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; private static final String STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = "COSMOS.STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS"; - private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 600; + private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 60; private static final String ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = "COSMOS.ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS"; private static final int DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = 30; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 3b0c59d58630..f29f13ef7294 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5908,10 +5908,13 @@ public void addPartitionLevelUnavailableRegionsForRequest( PartitionKeyRange partitionKeyRange = collectionRoutingMap.getRangeByEffectivePartitionKey(effectivePartitionKeyString); checkNotNull(partitionKeyRange, "partitionKeyRange cannot be null!"); - checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); - List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), partitionKeyRange); + checkNotNull(this.globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); + List unavailableLocationsForPartition = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), partitionKeyRange); List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); + // cache the effective partition key if possible - can be a bottleneck, + // since it is also recomputed in AddressResolver + request.setEffectivePartitionKey(effectivePartitionKeyString); request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java index a4f066c21fd9..89a4cf355105 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/AddressResolver.java @@ -680,7 +680,8 @@ private PartitionKeyRange tryResolveServerPartitionByPartitionKey( // partition getKey definition cached - like if collection with same getName but with RANGE partitioning is created. // In this case server will not pass x-ms-documentdb-collection-rid check and will return back InvalidPartitionException. // GATEWAY will refresh its cache and retry. - String effectivePartitionKey = PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKey, collection.getPartitionKey()); + String effectivePartitionKey = StringUtils.isNotEmpty(request.getEffectivePartitionKey()) + ? request.getEffectivePartitionKey() : PartitionKeyInternalHelper.getEffectivePartitionKeyString(partitionKey, collection.getPartitionKey()); request.setEffectivePartitionKey(effectivePartitionKey); From fbe3e7c6356b325ef8e57661364a99ae9de135fe Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 7 Jul 2024 13:31:19 -0400 Subject: [PATCH 116/140] Refactoring. --- .../implementation/RxGatewayStoreModelTest.java | 12 ++++-------- .../implementation/SpyClientUnderTestFactory.java | 6 ++---- .../cosmos/implementation/RxDocumentClientImpl.java | 9 +++------ .../cosmos/implementation/RxGatewayStoreModel.java | 10 ++++------ .../directconnectivity/StoreClient.java | 2 -- 5 files changed, 13 insertions(+), 26 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index c9a1db80efc1..0ea494c8dbc6 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -98,8 +98,7 @@ public void readTimeout() throws Exception { userAgentContainer, globalEndpointManager, httpClient, - null, - globalPartitionEndpointManager); + null); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName(clientContext, @@ -142,8 +141,7 @@ public void serviceUnavailable() throws Exception { userAgentContainer, globalEndpointManager, httpClient, - null, - globalPartitionEndpointManager); + null); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName(clientContext, @@ -200,8 +198,7 @@ public void applySessionToken( new UserAgentContainer(), globalEndpointManager, httpClient, - apiType, - globalPartitionEndpointManager); + apiType); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); httpClient = ReflectionUtils.getHttpClient(storeModel); @@ -268,8 +265,7 @@ public void validateApiType() throws Exception { new UserAgentContainer(), globalEndpointManager, httpClient, - apiType, - globalPartitionEndpointManager); + apiType); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName( clientContext, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index 31496cf438da..2798d707dc8c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -121,8 +121,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient rxClient, - ApiType apiType, - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { + ApiType apiType) { this.origRxGatewayStoreModel = super.createRxGatewayProxy( sessionContainer, consistencyLevel, @@ -130,8 +129,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, userAgentContainer, globalEndpointManager, rxClient, - apiType, - globalPartitionEndpointManager); + apiType); this.requests = Collections.synchronizedList(new ArrayList<>()); this.spyRxGatewayStoreModel = Mockito.spy(this.origRxGatewayStoreModel); this.initRequestCapture(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index f29f13ef7294..e8a383e4a499 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -685,8 +685,7 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.userAgentContainer, this.globalEndpointManager, this.reactorHttpClient, - this.apiType, - this.globalPartitionEndpointManagerForCircuitBreaker); + this.apiType); this.globalEndpointManager.init(); this.globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(this); @@ -818,8 +817,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - ApiType apiType, - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { + ApiType apiType) { return new RxGatewayStoreModel( this, sessionContainer, @@ -828,8 +826,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, userAgentContainer, globalEndpointManager, httpClient, - apiType, - globalPartitionEndpointManager); + apiType); } private HttpClient httpClient() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 1f4e7730ab21..c613887966d7 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -62,7 +62,6 @@ public class RxGatewayStoreModel implements RxStoreModel { private final HttpClient httpClient; private final QueryCompatibilityMode queryCompatibilityMode; private final GlobalEndpointManager globalEndpointManager; - private final GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager; private ConsistencyLevel defaultConsistencyLevel; private ISessionContainer sessionContainer; private ThroughputControlStore throughputControlStore; @@ -80,8 +79,8 @@ public RxGatewayStoreModel( UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - ApiType apiType, - GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManager) { + ApiType apiType) { + this.clientContext = clientContext; this.defaultHeaders = new HashMap<>(); this.defaultHeaders.put(HttpConstants.HttpHeaders.CACHE_CONTROL, @@ -113,7 +112,6 @@ public RxGatewayStoreModel( this.httpClient = httpClient; this.sessionContainer = sessionContainer; - this.globalPartitionEndpointManager = globalPartitionEndpointManager; } public RxGatewayStoreModel(RxGatewayStoreModel inner) { @@ -125,7 +123,6 @@ public RxGatewayStoreModel(RxGatewayStoreModel inner) { this.httpClient = inner.httpClient; this.sessionContainer = inner.sessionContainer; - this.globalPartitionEndpointManager = inner.globalPartitionEndpointManager; } void setGatewayServiceConfigurationReader(GatewayServiceConfigurationReader gatewayServiceConfigurationReader) { @@ -742,7 +739,8 @@ private Mono applySessionToken(RxDocumentServiceRequest request) { SessionTokenHelper.setPartitionLocalSessionToken(request, sessionContainer); } } else if (partitionKeyInternal != null) { - String effectivePartitionKeyString = PartitionKeyInternalHelper + String effectivePartitionKeyString = StringUtils.isNotEmpty(request.getEffectivePartitionKey()) ? + request.getEffectivePartitionKey() : PartitionKeyInternalHelper .getEffectivePartitionKeyString( partitionKeyInternal, collectionValueHolder.v.getPartitionKey()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java index eaab1e2e132f..1813a2f51a52 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClient.java @@ -52,7 +52,6 @@ public class StoreClient implements IStoreClient { private final Logger logger = LoggerFactory.getLogger(StoreClient.class); private final GatewayServiceConfigurationReader serviceConfigurationReader; private final ISessionContainer sessionContainer; - private final IAddressResolver addressResolver; private final ReplicatedResourceClient replicatedResourceClient; private final TransportClient transportClient; private final String ZERO_PARTITION_KEY_RANGE = "0"; @@ -82,7 +81,6 @@ public StoreClient( sessionRetryOptions); addressResolver.setOpenConnectionsProcessor(this.transportClient.getProactiveOpenConnectionsProcessor()); - this.addressResolver = addressResolver; } public void enableThroughputControl(ThroughputControlStore throughputControlStore) { From 5a9a0e6b7884bbbc792c816292e761b13c75a53b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 7 Jul 2024 14:28:27 -0400 Subject: [PATCH 117/140] Refactoring. --- .../azure/cosmos/implementation/RxDocumentClientUnderTest.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java index 927bf09c1764..77ac38573ca7 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java @@ -91,7 +91,6 @@ RxGatewayStoreModel createRxGatewayProxy( userAgentContainer, globalEndpointManager, spyHttpClient, - apiType, - globalPartitionEndpointManagerForCircuitBreaker); + apiType); } } From d3a090b4b5896dfaa68e8f5e9448959b37baa89c Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 7 Jul 2024 14:55:07 -0400 Subject: [PATCH 118/140] Refactoring. --- ...EndpointManagerForCircuitBreakerTests.java | 6 ++--- .../implementation/RxDocumentClientImpl.java | 24 ++++++++++++++----- ...itionEndpointManagerForCircuitBreaker.java | 15 ++++-------- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 143d73d5ce28..73140707ee86 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -930,11 +930,11 @@ private static void validateAllRegionsAreNotUnavailableAfterExceptionInLocation( logger.warn("Handling exception for {}", locationWithFailure.getPath()); globalPartitionEndpointManagerForCircuitBreaker.handleLocationExceptionForPartitionKeyRange(request, locationWithFailure); - List unavailableLocations - = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(collectionResourceId, partitionKeyRange); + List unavailableRegions + = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange(collectionResourceId, partitionKeyRange, request.getOperationType()); logger.info("Assert that all regions are not Unavailable!"); - assertThat(unavailableLocations.size()).isLessThan(applicableReadWriteLocations.size()); + assertThat(unavailableRegions.size()).isLessThan(applicableReadWriteLocations.size()); } private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index e8a383e4a499..9a3f2892c65b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -5906,8 +5906,12 @@ public void addPartitionLevelUnavailableRegionsForRequest( checkNotNull(partitionKeyRange, "partitionKeyRange cannot be null!"); checkNotNull(this.globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); - List unavailableLocationsForPartition = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), partitionKeyRange); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); + + List unavailableRegionsForPartition + = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( + request.getResourceId(), + partitionKeyRange, + request.getOperationType()); // cache the effective partition key if possible - can be a bottleneck, // since it is also recomputed in AddressResolver @@ -5936,8 +5940,12 @@ public void addPartitionLevelUnavailableRegionsForFeedRequest( if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); - List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), resolvedPartitionKeyRange); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); + + List unavailableRegionsForPartition + = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( + request.getResourceId(), + resolvedPartitionKeyRange, + request.getOperationType()); request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); } @@ -5962,8 +5970,12 @@ public void addPartitionLevelUnavailableRegionsForChangeFeedRequest( if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { checkNotNull(globalPartitionEndpointManagerForCircuitBreaker, "globalPartitionEndpointManagerForCircuitBreaker cannot be null!"); - List unavailableLocationsForPartition = globalPartitionEndpointManagerForCircuitBreaker.getUnavailableLocationEndpointsForPartitionKeyRange(request.getResourceId(), resolvedPartitionKeyRange); - List unavailableRegionsForPartition = unavailableLocationsForPartition.stream().map(unavailableLocationForPartition -> this.globalEndpointManager.getRegionName(unavailableLocationForPartition, request.getOperationType())).collect(Collectors.toList()); + + List unavailableRegionsForPartition + = this.globalPartitionEndpointManagerForCircuitBreaker.getUnavailableRegionsForPartitionKeyRange( + request.getResourceId(), + resolvedPartitionKeyRange, + request.getOperationType()); request.requestContext.setUnavailableRegionsForPartition(unavailableRegionsForPartition); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 6d711e11b632..ef7286de5829 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -4,12 +4,10 @@ package com.azure.cosmos.implementation.circuitBreaker; import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; -import com.azure.cosmos.CosmosException; import com.azure.cosmos.implementation.Configs; import com.azure.cosmos.implementation.CosmosPagedFluxOptions; import com.azure.cosmos.implementation.CosmosSchedulers; import com.azure.cosmos.implementation.Document; -import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; @@ -21,9 +19,7 @@ import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.RxDocumentServiceRequest; -import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; -import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.models.CosmosQueryRequestOptions; @@ -31,7 +27,6 @@ import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; -import reactor.core.publisher.SignalType; import java.net.URI; import java.time.Duration; @@ -177,7 +172,7 @@ public void handleLocationSuccessForPartitionKeyRange(RxDocumentServiceRequest r }); } - public List getUnavailableLocationEndpointsForPartitionKeyRange(String collectionResourceId, PartitionKeyRange partitionKeyRange) { + public List getUnavailableRegionsForPartitionKeyRange(String collectionResourceId, PartitionKeyRange partitionKeyRange, OperationType operationType) { checkNotNull(partitionKeyRange, "Argument 'partitionKeyRange' cannot be null!"); checkNotNull(collectionResourceId, "Argument 'collectionResourceId' cannot be null!"); @@ -187,7 +182,7 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String coll PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfoSnapshot = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); - List unavailableLocations = new ArrayList<>(); + List unavailableRegions = new ArrayList<>(); if (partitionLevelLocationUnavailabilityInfoSnapshot != null) { Map locationEndpointToFailureMetricsForPartition = @@ -197,13 +192,13 @@ public List getUnavailableLocationEndpointsForPartitionKeyRange(String coll URI location = pair.getKey(); LocationSpecificHealthContext locationSpecificHealthContext = pair.getValue(); - if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { - unavailableLocations.add(location); + if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) {; + unavailableRegions.add(this.globalEndpointManager.getRegionName(location, operationType)); } } } - return UnmodifiableList.unmodifiableList(unavailableLocations); + return UnmodifiableList.unmodifiableList(unavailableRegions); } private Flux updateStaleLocationInfo() { From 6e99cb865f7b1bb65c8f69fe74cc4105523cf8fa Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 7 Jul 2024 16:18:22 -0400 Subject: [PATCH 119/140] Refactoring. --- ...itionEndpointManagerForCircuitBreaker.java | 32 ++++++++++++------- 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index ef7286de5829..d202bb028637 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -54,6 +54,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; private final AtomicReference rxDocClientSnapshot; + private final ConcurrentHashMap locationToRegion; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { this.partitionKeyRangeToLocationSpecificUnavailabilityInfo = new ConcurrentHashMap<>(); @@ -65,6 +66,7 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo this.locationSpecificHealthContextTransitionHandler = new LocationSpecificHealthContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); this.rxDocClientSnapshot = new AtomicReference<>(); + this.locationToRegion = new ConcurrentHashMap<>(); } public void init() { @@ -192,7 +194,7 @@ public List getUnavailableRegionsForPartitionKeyRange(String collectionR URI location = pair.getKey(); LocationSpecificHealthContext locationSpecificHealthContext = pair.getValue(); - if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) {; + if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { unavailableRegions.add(this.globalEndpointManager.getRegionName(location, operationType)); } } @@ -255,13 +257,13 @@ private Flux updateStaleLocationInfo() { queryRequestOptions.setFeedRange(new FeedRangeEpkImpl(partitionKeyRangeWrapper.getPartitionKeyRange().toRange())); queryRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build()); - List applicableReadEndpoints = globalEndpointManager + List applicableReadEndpoints = this.globalEndpointManager .getApplicableReadEndpoints(Collections.emptyList()) .stream() - .map(locationEndpoint -> globalEndpointManager.getRegionName(locationEndpoint, OperationType.Read)) + .map(locationEndpoint -> this.globalEndpointManager.getRegionName(locationEndpoint, OperationType.Query)) .collect(Collectors.toList()); - applicableReadEndpoints.remove(globalEndpointManager.getRegionName(locationWithStaleUnavailabilityInfo, OperationType.Read)); + applicableReadEndpoints.remove(this.globalEndpointManager.getRegionName(locationWithStaleUnavailabilityInfo, OperationType.Query)); queryRequestOptions.setExcludedRegions(applicableReadEndpoints); queryRequestOptionsAccessor.disablePerPartitionCircuitBreaker(queryRequestOptions); @@ -399,11 +401,13 @@ private boolean handleException( locationWithException, isReadOnlyRequest); - this.regionToLocationSpecificHealthContext.put( - GlobalPartitionEndpointManagerForCircuitBreaker - .this.globalEndpointManager - .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), - locationSpecificHealthContextAfterTransition); + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.putIfAbsent(locationAsKey, GlobalPartitionEndpointManagerForCircuitBreaker + .this.globalEndpointManager + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + + String region = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey); + + this.regionToLocationSpecificHealthContext.put(region, locationSpecificHealthContextAfterTransition); isExceptionThresholdBreached.set(locationSpecificHealthContextAfterTransition.isExceptionThresholdBreached()); return locationSpecificHealthContextAfterTransition; @@ -443,11 +447,15 @@ private void handleSuccess( false, isReadOnlyRequest); - this.regionToLocationSpecificHealthContext.put( + // used only for building diagnostics - so creating a lookup for URI and region name + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.putIfAbsent( + locationAsKey, GlobalPartitionEndpointManagerForCircuitBreaker .this.globalEndpointManager - .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create), - locationSpecificHealthContextAfterTransition); + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + + String region = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey); + this.regionToLocationSpecificHealthContext.put(region, locationSpecificHealthContextAfterTransition); return locationSpecificHealthContextAfterTransition; }); From b2e23e71bb10219435903a1037ab856114b5ee69 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sun, 7 Jul 2024 19:24:02 -0400 Subject: [PATCH 120/140] Refactoring. --- .../implementation/RxDocumentClientImpl.java | 10 +++++++ ...itionEndpointManagerForCircuitBreaker.java | 26 ++++++++++++------- 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 9a3f2892c65b..6241a08073bf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -556,6 +556,16 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.isRegionScopedSessionCapturingEnabledOnClientOrSystemConfig = isRegionScopedSessionCapturingEnabled; this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); + + // todo: revert config before merge + System.setProperty( + "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", + "{\"isPartitionLevelCircuitBreakerEnabled\": true, " + + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," + + "\"consecutiveExceptionCountToleratedForReads\": 10," + + "\"consecutiveExceptionCountToleratedForWrites\": 5," + + "}"); + this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); this.globalPartitionEndpointManagerForCircuitBreaker.init(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index d202bb028637..c3db07f2c427 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -401,12 +401,17 @@ private boolean handleException( locationWithException, isReadOnlyRequest); - GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.putIfAbsent(locationAsKey, GlobalPartitionEndpointManagerForCircuitBreaker - .this.globalEndpointManager - .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); - String region = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey); + if (GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey) == null) { + + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.put( + locationAsKey, + GlobalPartitionEndpointManagerForCircuitBreaker + .this.globalEndpointManager + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + } + String region = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey); this.regionToLocationSpecificHealthContext.put(region, locationSpecificHealthContextAfterTransition); isExceptionThresholdBreached.set(locationSpecificHealthContextAfterTransition.isExceptionThresholdBreached()); @@ -448,11 +453,14 @@ private void handleSuccess( isReadOnlyRequest); // used only for building diagnostics - so creating a lookup for URI and region name - GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.putIfAbsent( - locationAsKey, - GlobalPartitionEndpointManagerForCircuitBreaker - .this.globalEndpointManager - .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + + if (GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey) == null) { + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.put( + locationAsKey, + GlobalPartitionEndpointManagerForCircuitBreaker + .this.globalEndpointManager + .getRegionName(locationAsKey, isReadOnlyRequest ? OperationType.Read : OperationType.Create)); + } String region = GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.get(locationAsKey); this.regionToLocationSpecificHealthContext.put(region, locationSpecificHealthContextAfterTransition); From 9b29f556cd912edf537e0c86927bef570412b0b0 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 8 Jul 2024 10:11:42 -0400 Subject: [PATCH 121/140] Refactoring. --- .../azure/cosmos/implementation/RxDocumentClientImplTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java index 5aaaa914dc21..0f47848f950c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java @@ -110,7 +110,8 @@ public void setUp() { this.defaultItemSerializer = Mockito.mock(CosmosItemSerializer.class); } - @Test(groups = {"unit"}) + // todo: fix and revert enabled = false when circuit breaker is enabled + @Test(groups = {"unit"}, enabled = false) public void readMany() { // setup static method mocks From d069d397e68174a6047459386e13ef83a4196dfd Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 8 Jul 2024 17:52:34 -0400 Subject: [PATCH 122/140] Refactoring. --- .../RxDocumentClientImplTest.java | 28 +++++++++++++------ 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java index 0f47848f950c..0bafee942c5f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientImplTest.java @@ -111,14 +111,19 @@ public void setUp() { } // todo: fix and revert enabled = false when circuit breaker is enabled - @Test(groups = {"unit"}, enabled = false) + @Test(groups = {"unit"}, enabled = true) public void readMany() { // setup static method mocks MockedStatic httpClientMock = Mockito.mockStatic(HttpClient.class); MockedStatic partitionKeyInternalHelperMock = Mockito.mockStatic(PartitionKeyInternalHelper.class); MockedStatic documentQueryExecutionFactoryMock = Mockito.mockStatic(DocumentQueryExecutionContextFactory.class); - MockedStatic observableHelperMock = Mockito.mockStatic(ObservableHelper.class); +// MockedStatic observableHelperMock = Mockito.mockStatic(ObservableHelper.class); + + // setup mocks + DocumentClientRetryPolicy documentClientRetryPolicyMock = Mockito.mock(DocumentClientRetryPolicy.class); + RxGatewayStoreModel gatewayStoreModelMock = Mockito.mock(RxGatewayStoreModel.class); + RxStoreModel serverStoreModelMock = Mockito.mock(RxStoreModel.class); // dummy values PartitionKeyRange dummyPartitionKeyRange1 = new PartitionKeyRange() @@ -197,9 +202,6 @@ public void readMany() { Mockito.any() )) .thenReturn(Flux.just(dummyExecutionContextForQuery(queryResults, headersForQueries, InternalObjectNode.class))); - observableHelperMock - .when(() -> ObservableHelper.inlineIfPossibleAsObs(Mockito.any(), Mockito.any())) - .thenReturn(Mono.just(dummyResourceResponse(pointReadResult, headersForPointReads))); Mockito .when(this.collectionCacheMock.resolveCollectionAsync(Mockito.isNull(), Mockito.any(RxDocumentServiceRequest.class))) @@ -213,7 +215,15 @@ public void readMany() { .when(this.partitionKeyRangeCacheMock.tryLookupAsync(Mockito.any(), Mockito.any(), Mockito.any(), Mockito.any())) .thenReturn(Mono.just(dummyCollectionRoutingMap(epksPartitionKeyRangeMap))); + RetryContext retryContext = new RetryContext(); + Mockito.when(this.resetSessionTokenRetryPolicyMock.getRequestPolicy(null)).thenReturn(dummyDocumentClientRetryPolicy()); + Mockito.when(this.cosmosAuthorizationTokenResolverMock.getAuthorizationToken(Mockito.anyString(), Mockito.anyString(), Mockito.anyString(), Mockito.any())).thenReturn("abcdefgh"); + Mockito.when(this.resetSessionTokenRetryPolicyMock.getRequestPolicy(Mockito.any())).thenReturn(documentClientRetryPolicyMock); + Mockito.when(documentClientRetryPolicyMock.getRetryContext()).thenReturn(retryContext); + Mockito + .when(serverStoreModelMock.processMessage(Mockito.any(RxDocumentServiceRequest.class))) + .thenReturn(Mono.just(mockRxDocumentServiceResponse(pointReadResult, headersForPointReads))); // initialize object to be tested RxDocumentClientImpl rxDocumentClient = new RxDocumentClientImpl( @@ -242,6 +252,8 @@ public void readMany() { ReflectionUtils.setCollectionCache(rxDocumentClient, this.collectionCacheMock); ReflectionUtils.setPartitionKeyRangeCache(rxDocumentClient, this.partitionKeyRangeCacheMock); ReflectionUtils.setResetSessionTokenRetryPolicy(rxDocumentClient, this.resetSessionTokenRetryPolicyMock); + ReflectionUtils.setGatewayProxy(rxDocumentClient, gatewayStoreModelMock); + ReflectionUtils.setServerStoreModel(rxDocumentClient, serverStoreModelMock); ArrayList cosmosItemIdentities = new ArrayList(); @@ -294,7 +306,6 @@ public void readMany() { // release static mocks httpClientMock.close(); partitionKeyInternalHelperMock.close(); - observableHelperMock.close(); documentQueryExecutionFactoryMock.close(); // de-register client @@ -418,8 +429,7 @@ public RetryContext getRetryContext() { }; } - private static ResourceResponse dummyResourceResponse(String content, Map headers) { - + private static RxDocumentServiceResponse mockRxDocumentServiceResponse(String content, Map headers) { byte[] blob = content.getBytes(StandardCharsets.UTF_8); StoreResponse storeResponse = new StoreResponse( HttpResponseStatus.OK.code(), @@ -457,7 +467,7 @@ public CosmosDiagnostics getMostRecentlyCreatedDiagnostics() { documentServiceResponse.setCosmosDiagnostics(dummyCosmosDiagnostics()); - return new ResourceResponse<>(documentServiceResponse, Document.class); + return documentServiceResponse; } private static CosmosDiagnostics dummyCosmosDiagnostics() { From e982cfe3d4cbb988aeb8e5a93c202dd06f959a9b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 9 Jul 2024 18:55:25 -0400 Subject: [PATCH 123/140] Refactoring and reacting to review comments. --- ...EndpointManagerForCircuitBreakerTests.java | 82 ----------- .../PartitionLevelCircuitBreakerTests.java | 2 - .../com/azure/cosmos/CosmosClientBuilder.java | 5 +- .../implementation/AsyncDocumentClient.java | 2 - .../azure/cosmos/implementation/Configs.java | 63 +++++++-- .../implementation/RxDocumentClientImpl.java | 16 +-- ...itionEndpointManagerForCircuitBreaker.java | 127 ++++++++---------- ...pecificHealthContextTransitionHandler.java | 12 +- .../PartitionKeyRangeWrapper.java | 2 +- .../GatewayAddressCache.java | 23 ++++ .../GlobalAddressResolver.java | 10 ++ 11 files changed, 152 insertions(+), 192 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 73140707ee86..8be7c9ecf2fc 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -305,16 +305,6 @@ public void recordUnavailableToHealthyTentativeStatusTransition(String partition GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); - RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); - CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); - ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); - - globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); - - setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); - setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); - setUpMockInvocations(connectionPolicyMock); - globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -401,16 +391,6 @@ public void recordHealthyTentativeToHealthyStatusTransition(String partitionLeve GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); - RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); - CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); - ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); - - globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); - - setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); - setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); - setUpMockInvocations(connectionPolicyMock); - globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -504,16 +484,6 @@ public void recordHealthyTentativeToUnavailableTransition(String partitionLevelC GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); - RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); - CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); - ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); - - globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); - - setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); - setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); - setUpMockInvocations(connectionPolicyMock); - globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -606,16 +576,6 @@ public void allRegionsUnavailableHandling(String partitionLevelCircuitBreakerCon GlobalPartitionEndpointManagerForCircuitBreaker globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManagerMock); - RxDocumentClientImpl rxDocumentClientMock = Mockito.mock(RxDocumentClientImpl.class); - CosmosAsyncClient cosmosAsyncClientMock = Mockito.mock(CosmosAsyncClient.class); - ConnectionPolicy connectionPolicyMock = Mockito.mock(ConnectionPolicy.class); - - globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(rxDocumentClientMock); - - setUpMockInvocations(rxDocumentClientMock, cosmosAsyncClientMock); - setUpMockInvocations(cosmosAsyncClientMock, connectionPolicyMock); - setUpMockInvocations(connectionPolicyMock); - globalPartitionEndpointManagerForCircuitBreaker.init(); String pkRangeId = "0"; @@ -980,46 +940,4 @@ private static Class getClassBySimpleName(Class[] classes, String classSim logger.warn("Class with simple name {} does not exist!", classSimpleName); return null; } - - private static void setUpMockInvocations(RxDocumentClientImpl rxDocumentClientMock, CosmosAsyncClient cosmosAsyncClientMock) { - Mockito.when( - rxDocumentClientMock.queryDocuments(Mockito.anyString(), Mockito.anyString(), Mockito.any(), Mockito.any())) - .thenReturn(Flux.empty()); - - Mockito.when( - rxDocumentClientMock.getCachedCosmosAsyncClientSnapshot()) - .thenReturn(cosmosAsyncClientMock); - } - - private static void setUpMockInvocations(CosmosAsyncClient cosmosAsyncClientMock, ConnectionPolicy connectionPolicyMock) { - Mockito.when( - cosmosAsyncClientMock.getEffectiveDiagnosticsThresholds(Mockito.any()) - ).thenReturn(new CosmosDiagnosticsThresholds()); - - Mockito.when( - cosmosAsyncClientMock.getEffectiveConsistencyLevel(Mockito.any(), Mockito.any()) - ).thenReturn(ConsistencyLevel.EVENTUAL); - - Mockito.when( - cosmosAsyncClientMock.getConnectionPolicy() - ).thenReturn(connectionPolicyMock); - - Mockito.when( - cosmosAsyncClientMock.getAccountTagValue() - ).thenReturn("contoso-cosmos-db"); - - Mockito.when( - cosmosAsyncClientMock.getServiceEndpoint() - ).thenReturn("https://contoso-cosmos-db.azure.documents.com"); - - Mockito.when( - cosmosAsyncClientMock.getUserAgent() - ).thenReturn("java-circuit-breaker-test"); - } - - private static void setUpMockInvocations(ConnectionPolicy connectionPolicyMock) { - Mockito.when( - connectionPolicyMock.getConnectionMode() - ).thenReturn(ConnectionMode.DIRECT); - } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java index e9088f6e22c8..ed7f561c439b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java @@ -3,7 +3,6 @@ package com.azure.cosmos.implementation; - import com.azure.cosmos.ConnectionMode; import com.azure.cosmos.CosmosAsyncClient; import com.azure.cosmos.CosmosAsyncContainer; @@ -80,7 +79,6 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.testng.Assert.fail; - public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { private static final ImplementationBridgeHelpers.CosmosAsyncContainerHelper.CosmosAsyncContainerAccessor containerAccessor diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 4edf1ca7f66b..7bf588e584e9 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -1183,7 +1183,7 @@ CosmosAsyncClient buildAsyncClient(boolean logStartupInfo) { validateConfig(); buildConnectionPolicy(); CosmosAsyncClient cosmosAsyncClient = new CosmosAsyncClient(this); - cosmosAsyncClient.getDocClientWrapper().cacheEnclosingCosmosAsyncClient(cosmosAsyncClient); + if (proactiveContainerInitConfig != null) { cosmosAsyncClient.recordOpenConnectionsAndInitCachesStarted(proactiveContainerInitConfig.getCosmosContainerIdentities()); @@ -1219,9 +1219,6 @@ public CosmosClient buildClient() { buildConnectionPolicy(); CosmosClient cosmosClient = new CosmosClient(this); - CosmosAsyncClient cosmosAsyncClient = cosmosClient.asyncClient(); - cosmosAsyncClient.getDocClientWrapper().cacheEnclosingCosmosAsyncClient(cosmosAsyncClient); - if (proactiveContainerInitConfig != null) { cosmosClient.recordOpenConnectionsAndInitCachesStarted(proactiveContainerInitConfig.getCosmosContainerIdentities()); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index fde68cc437f2..1ef7b14c9908 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -1648,6 +1648,4 @@ Flux> readAllDocuments( void recordOpenConnectionsAndInitCachesStarted(List cosmosContainerIdentities); public String getMasterKeyOrResourceToken(); - - void cacheEnclosingCosmosAsyncClient(CosmosAsyncClient cosmosAsyncClient); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java index 4bd07c63d058..d4a00661e07d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/Configs.java @@ -203,17 +203,32 @@ public class Configs { // + "\"applyDiagnosticThresholdsForTransportLevelMeters\":true}"); public static final String METRICS_CONFIG = "COSMOS.METRICS_CONFIG"; public static final String DEFAULT_METRICS_CONFIG = CosmosMicrometerMetricsConfig.DEFAULT.toJson(); + + // For partition-level circuit breaker, below config will set the tolerated consecutive exception counts + // for reads and writes for a given partition before being marked as Unavailable private static final String DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = PartitionLevelCircuitBreakerConfig.DEFAULT.toJson(); private static final String PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG = "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG"; private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT"; private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT = 2; private static final String STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = "COSMOS.STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS"; private static final int DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS = 1; + + // For partition-level circuit breaker, a background thread will run periodically every y seconds at a minimum + // in an attempt to recover Unavailable partitions private static final String STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = "COSMOS.STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS"; private static final int DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS = 60; + + // For partition-level circuit breaker, a partition can be allowed to be Unavailable for minimum of x seconds + // as specified by the below setting after which a background thread will attempt to recover the partition private static final String ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = "COSMOS.ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS"; private static final int DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS = 30; + // For partition-level circuit breaker, in order to recover a partition in a region, the SDK when configured + // in the direct connectivity mode, establishes connections to replicas to attempt to recover a region + // Below sets a time limit on how long these connection establishments be attempted for + private static final int DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS = 10; + private static final String CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS = "COSMOS.CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS"; + public Configs() { this.sslContext = sslContextInit(); } @@ -613,14 +628,25 @@ public static CosmosMicrometerMetricsConfig getMetricsConfig() { } public static PartitionLevelCircuitBreakerConfig getPartitionLevelCircuitBreakerConfig() { - String partitionLevelCircuitBreakerConfig = + String partitionLevelCircuitBreakerConfigAsString = System.getProperty( PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG, firstNonNull( emptyToNull(System.getenv().get(PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG)), DEFAULT_PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG)); - return PartitionLevelCircuitBreakerConfig.fromJsonString(partitionLevelCircuitBreakerConfig); + PartitionLevelCircuitBreakerConfig partitionLevelCircuitBreakerConfig + = PartitionLevelCircuitBreakerConfig.fromJsonString(partitionLevelCircuitBreakerConfigAsString); + + if (partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForReads() < 10) { + return PartitionLevelCircuitBreakerConfig.DEFAULT; + } + + if (partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites() < 5) { + return PartitionLevelCircuitBreakerConfig.DEFAULT; + } + + return partitionLevelCircuitBreakerConfig; } public static int getStaleCollectionCacheRefreshRetryCount() { @@ -628,13 +654,13 @@ public static int getStaleCollectionCacheRefreshRetryCount() { String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); if (StringUtils.isNotEmpty(valueFromSystemProperty)) { - return Integer.parseInt(valueFromSystemProperty); + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); } String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); if (StringUtils.isNotEmpty(valueFromEnvVariable)) { - return Integer.parseInt(valueFromEnvVariable); + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT); } return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_COUNT; @@ -645,13 +671,13 @@ public static int getStaleCollectionCacheRefreshRetryIntervalInSeconds() { String valueFromSystemProperty = System.getProperty(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); if (StringUtils.isNotEmpty(valueFromSystemProperty)) { - return Integer.parseInt(valueFromSystemProperty); + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); } String valueFromEnvVariable = System.getenv(STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS); if (StringUtils.isNotEmpty(valueFromEnvVariable)) { - return Integer.parseInt(valueFromEnvVariable); + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); } return DEFAULT_STALE_COLLECTION_CACHE_REFRESH_RETRY_INTERVAL_IN_SECONDS; @@ -662,13 +688,13 @@ public static int getStalePartitionUnavailabilityRefreshIntervalInSeconds() { String valueFromSystemProperty = System.getProperty(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); if (StringUtils.isNotEmpty(valueFromSystemProperty)) { - return Integer.parseInt(valueFromSystemProperty); + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); } String valueFromEnvVariable = System.getenv(STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); if (StringUtils.isNotEmpty(valueFromEnvVariable)) { - return Integer.parseInt(valueFromEnvVariable); + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS); } return DEFAULT_STALE_PARTITION_UNAVAILABILITY_REFRESH_INTERVAL_IN_SECONDS; @@ -679,15 +705,32 @@ public static int getAllowedPartitionUnavailabilityDurationInSeconds() { String valueFromSystemProperty = System.getProperty(ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); if (StringUtils.isNotEmpty(valueFromSystemProperty)) { - return Integer.parseInt(valueFromSystemProperty); + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); } String valueFromEnvVariable = System.getenv(ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); if (StringUtils.isNotEmpty(valueFromEnvVariable)) { - return Integer.parseInt(valueFromEnvVariable); + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS); } return DEFAULT_ALLOWED_PARTITION_UNAVAILABILITY_DURATION_IN_SECONDS; } + + public static int getConnectionEstablishmentTimeoutForPartitionRecoveryInSeconds() { + + String valueFromSystemProperty = System.getProperty(CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromSystemProperty)) { + return Math.max(Integer.parseInt(valueFromSystemProperty), DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + } + + String valueFromEnvVariable = System.getenv(CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + + if (StringUtils.isNotEmpty(valueFromEnvVariable)) { + return Math.max(Integer.parseInt(valueFromEnvVariable), DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS); + } + + return DEFAULT_CONNECTION_ESTABLISHMENT_TIMEOUT_FOR_PARTITION_RECOVERY_IN_SECONDS; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 6241a08073bf..7b6c9b66afc1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -557,15 +557,6 @@ private RxDocumentClientImpl(URI serviceEndpoint, this.sessionContainer = new SessionContainer(this.serviceEndpoint.getHost(), disableSessionCapturing); - // todo: revert config before merge - System.setProperty( - "COSMOS.PARTITION_LEVEL_CIRCUIT_BREAKER_CONFIG", - "{\"isPartitionLevelCircuitBreakerEnabled\": true, " - + "\"circuitBreakerType\": \"CONSECUTIVE_EXCEPTION_COUNT_BASED\"," - + "\"consecutiveExceptionCountToleratedForReads\": 10," - + "\"consecutiveExceptionCountToleratedForWrites\": 5," - + "}"); - this.globalPartitionEndpointManagerForCircuitBreaker = new GlobalPartitionEndpointManagerForCircuitBreaker(this.globalEndpointManager); this.globalPartitionEndpointManagerForCircuitBreaker.init(); @@ -698,7 +689,6 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.apiType); this.globalEndpointManager.init(); - this.globalPartitionEndpointManagerForCircuitBreaker.setRxDocumentClientImplSnapshot(this); DatabaseAccount databaseAccountSnapshot = this.initializeGatewayConfigurationReader(); this.resetSessionContainerIfNeeded(databaseAccountSnapshot); @@ -797,6 +787,7 @@ private void initializeDirectConnectivity() { this.clientTelemetry, this.globalEndpointManager); + this.globalPartitionEndpointManagerForCircuitBreaker.setGlobalAddressResolver(this.addressResolver); this.createStoreModel(true); } @@ -5766,11 +5757,6 @@ public String getMasterKeyOrResourceToken() { return this.masterKeyOrResourceToken; } - @Override - public void cacheEnclosingCosmosAsyncClient(CosmosAsyncClient cosmosAsyncClient) { - this.cachedCosmosAsyncClientSnapshot.set(cosmosAsyncClient); - } - private static SqlQuerySpec createLogicalPartitionScanQuerySpec( PartitionKey partitionKey, List partitionKeySelectors) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index c3db07f2c427..0e9ef8907c5c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -3,26 +3,20 @@ package com.azure.cosmos.implementation.circuitBreaker; -import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; import com.azure.cosmos.implementation.Configs; -import com.azure.cosmos.implementation.CosmosPagedFluxOptions; import com.azure.cosmos.implementation.CosmosSchedulers; -import com.azure.cosmos.implementation.Document; import com.azure.cosmos.implementation.FeedOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; -import com.azure.cosmos.implementation.PathsHelper; import com.azure.cosmos.implementation.PointOperationContextForCircuitBreaker; -import com.azure.cosmos.implementation.QueryFeedOperationState; import com.azure.cosmos.implementation.ResourceType; -import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; -import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; -import com.azure.cosmos.models.CosmosQueryRequestOptions; +import com.azure.cosmos.implementation.directconnectivity.GatewayAddressCache; +import com.azure.cosmos.implementation.directconnectivity.GlobalAddressResolver; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; @@ -38,7 +32,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; -import java.util.stream.Collectors; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -53,7 +46,7 @@ public class GlobalPartitionEndpointManagerForCircuitBreaker { private final ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions; private final LocationSpecificHealthContextTransitionHandler locationSpecificHealthContextTransitionHandler; private final ConsecutiveExceptionBasedCircuitBreaker consecutiveExceptionBasedCircuitBreaker; - private final AtomicReference rxDocClientSnapshot; + private final AtomicReference globalAddressResolverSnapshot; private final ConcurrentHashMap locationToRegion; public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager globalEndpointManager) { @@ -65,7 +58,7 @@ public GlobalPartitionEndpointManagerForCircuitBreaker(GlobalEndpointManager glo this.consecutiveExceptionBasedCircuitBreaker = new ConsecutiveExceptionBasedCircuitBreaker(partitionLevelCircuitBreakerConfig); this.locationSpecificHealthContextTransitionHandler = new LocationSpecificHealthContextTransitionHandler(this.globalEndpointManager, this.consecutiveExceptionBasedCircuitBreaker); - this.rxDocClientSnapshot = new AtomicReference<>(); + this.globalAddressResolverSnapshot = new AtomicReference<>(); this.locationToRegion = new ConcurrentHashMap<>(); } @@ -250,69 +243,63 @@ private Flux updateStaleLocationInfo() { PartitionKeyRangeWrapper partitionKeyRangeWrapper = locationToLocationSpecificHealthContextPair.getLeft(); URI locationWithStaleUnavailabilityInfo = locationToLocationSpecificHealthContextPair.getRight().getLeft(); - LocationSpecificHealthContext locationSpecificHealthContext = locationToLocationSpecificHealthContextPair.getRight().getRight(); - - String collectionLink = locationSpecificHealthContext.getLastCollectionLinkSeen(); - CosmosQueryRequestOptions queryRequestOptions = new CosmosQueryRequestOptions(); - queryRequestOptions.setFeedRange(new FeedRangeEpkImpl(partitionKeyRangeWrapper.getPartitionKeyRange().toRange())); - queryRequestOptions.setCosmosEndToEndOperationLatencyPolicyConfig(new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)).build()); - - List applicableReadEndpoints = this.globalEndpointManager - .getApplicableReadEndpoints(Collections.emptyList()) - .stream() - .map(locationEndpoint -> this.globalEndpointManager.getRegionName(locationEndpoint, OperationType.Query)) - .collect(Collectors.toList()); - - applicableReadEndpoints.remove(this.globalEndpointManager.getRegionName(locationWithStaleUnavailabilityInfo, OperationType.Query)); - - queryRequestOptions.setExcludedRegions(applicableReadEndpoints); - queryRequestOptionsAccessor.disablePerPartitionCircuitBreaker(queryRequestOptions); - - String spanName = "queryItems." + collectionLink; - - QueryFeedOperationState queryFeedOperationState = new QueryFeedOperationState( - this.rxDocClientSnapshot.get().getCachedCosmosAsyncClientSnapshot(), - spanName, - PathsHelper.getDatabasePath(collectionLink), - collectionLink, - ResourceType.Document, - OperationType.Read, - spanName, - queryRequestOptions, - new CosmosPagedFluxOptions()); - - RxDocumentClientImpl rxDocumentClient = this.rxDocClientSnapshot.get(); PartitionLevelLocationUnavailabilityInfo partitionLevelLocationUnavailabilityInfo = this.partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); if (partitionLevelLocationUnavailabilityInfo != null) { - return rxDocumentClient - .queryDocuments(collectionLink, "SELECT * FROM C OFFSET 0 LIMIT 1", queryFeedOperationState, Document.class) - .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) - .doOnComplete(() -> { - - if (logger.isDebugEnabled()) { - logger.debug("Partition health recovery query for partition key range : {}-{} and " + - "collection rid : {} has succeeded...", - partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), - partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId()); - } - partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { - - if (locationSpecificContextAsVal != null) { - locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker - .this.locationSpecificHealthContextTransitionHandler.handleSuccess( - locationSpecificContextAsVal, - partitionKeyRangeWrapper, - locationWithStaleUnavailabilityInfoAsKey, - false, - true); - } - return locationSpecificContextAsVal; - }); + GlobalAddressResolver globalAddressResolver = this.globalAddressResolverSnapshot.get(); + + if (globalAddressResolver != null) { + + GatewayAddressCache gatewayAddressCache = globalAddressResolver.getGatewayAddressCache(locationWithStaleUnavailabilityInfo); + + if (gatewayAddressCache != null) { + + return gatewayAddressCache + .submitOpenConnectionTasks(partitionKeyRangeWrapper.getPartitionKeyRange(), partitionKeyRangeWrapper.getCollectionResourceId()) + .publishOn(CosmosSchedulers.PARTITION_AVAILABILITY_STALENESS_CHECK_SINGLE) + .timeout(Duration.ofSeconds(Configs.getConnectionEstablishmentTimeoutForPartitionRecoveryInSeconds())) + .doOnComplete(() -> { + + if (logger.isDebugEnabled()) { + logger.debug("Partition health recovery query for partition key range : {}-{} and " + + "collection rid : {} has succeeded...", + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), + partitionKeyRangeWrapper.getCollectionResourceId()); + } + + partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationSpecificHealthContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + locationWithStaleUnavailabilityInfoAsKey, + false, + true); + } + return locationSpecificContextAsVal; + }); + }); + } + } else { + partitionLevelLocationUnavailabilityInfo.locationEndpointToLocationSpecificContextForPartition.compute(locationWithStaleUnavailabilityInfo, (locationWithStaleUnavailabilityInfoAsKey, locationSpecificContextAsVal) -> { + + if (locationSpecificContextAsVal != null) { + locationSpecificContextAsVal = GlobalPartitionEndpointManagerForCircuitBreaker + .this.locationSpecificHealthContextTransitionHandler.handleSuccess( + locationSpecificContextAsVal, + partitionKeyRangeWrapper, + locationWithStaleUnavailabilityInfoAsKey, + false, + true); + } + return locationSpecificContextAsVal; }); + } } return Flux.empty(); @@ -354,8 +341,8 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques return applicableWriteEndpoints != null && applicableWriteEndpoints.size() > 1; } - public void setRxDocumentClientImplSnapshot(RxDocumentClientImpl rxDocumentClient) { - this.rxDocClientSnapshot.set(rxDocumentClient); + public void setGlobalAddressResolver(GlobalAddressResolver globalAddressResolver) { + this.globalAddressResolverSnapshot.set(globalAddressResolver); } private class PartitionLevelLocationUnavailabilityInfo { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index 59e100ae2f3c..381efb53023a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -65,7 +65,7 @@ public LocationSpecificHealthContext handleSuccess( logger.debug("Partition {}-{} of collection : {} marked as Healthy from HealthyTentative for region : {}", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId(), + partitionKeyRangeWrapper.getCollectionResourceId(), this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } @@ -85,7 +85,7 @@ public LocationSpecificHealthContext handleSuccess( logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId(), + partitionKeyRangeWrapper.getCollectionResourceId(), this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } @@ -98,7 +98,7 @@ public LocationSpecificHealthContext handleSuccess( logger.debug("Partition {}-{} of collection : {} marked as HealthyTentative from Unavailable for region : {}", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId(), + partitionKeyRangeWrapper.getCollectionResourceId(), this.globalEndpointManager .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } @@ -129,7 +129,7 @@ public LocationSpecificHealthContext handleException( logger.debug("Partition {}-{} of collection : {} marked as HealthyWithFailures from Healthy for region : {}", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId(), + partitionKeyRangeWrapper.getCollectionResourceId(), this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } @@ -145,7 +145,7 @@ public LocationSpecificHealthContext handleException( logger.debug("Partition {}-{} of collection : {} has exception count of {} for region : {}", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId(), + partitionKeyRangeWrapper.getCollectionResourceId(), isReadOnlyRequest ? locationSpecificHealthContextInner.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContextInner.getExceptionCountForWriteForCircuitBreaking(), this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); @@ -175,7 +175,7 @@ public LocationSpecificHealthContext handleException( logger.debug("Partition {}-{} of collection : {} marked as Unavailable from HealthyTentative for region : {}", partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), - partitionKeyRangeWrapper.getResourceId(), + partitionKeyRangeWrapper.getCollectionResourceId(), this.globalEndpointManager .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java index 9a6b770efa54..92218a2e2736 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/PartitionKeyRangeWrapper.java @@ -20,7 +20,7 @@ public PartitionKeyRange getPartitionKeyRange() { return partitionKeyRange; } - public String getResourceId() { + public String getCollectionResourceId() { return resourceId; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java index 47889bea95a8..c6b84d130efa 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java @@ -1141,6 +1141,29 @@ public Mono submitOpenConnectionTask( return Mono.fromFuture(openConnectionTask); } + public Flux submitOpenConnectionTasks( + PartitionKeyRange partitionKeyRange, + String collectionRid) { + + if (this.proactiveOpenConnectionsProcessor == null) { + return Flux.empty(); + } + + checkNotNull(partitionKeyRange, "Argument 'partitionKeyRange' cannot be null!"); + checkNotNull(collectionRid, "Argument 'collectionRid' cannot be null!"); + + PartitionKeyRangeIdentity partitionKeyRangeIdentity = new PartitionKeyRangeIdentity(collectionRid, partitionKeyRange.getId()); + + return this.serverPartitionAddressCache.getAsync(partitionKeyRangeIdentity, cachedAddresses -> Mono.just(cachedAddresses), cachedAddresses -> true) + .flatMapMany(cachedAddresses -> Flux.fromArray(cachedAddresses)) + .flatMap(addressInformation -> Mono.fromFuture( + this.proactiveOpenConnectionsProcessor.submitOpenConnectionTaskOutsideLoop( + collectionRid, + this.addressEndpoint, + addressInformation.getPhysicalUri(), + 1))); + } + private Mono> getServerAddressesViaGatewayWithRetry( RxDocumentServiceRequest request, String collectionRid, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index fed1cbde61a8..ecd0fa583d35 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -317,6 +317,16 @@ private EndpointCache getOrAddEndpoint(URI endpoint) { return endpointCache; } + public GatewayAddressCache getGatewayAddressCache(URI endpoint) { + EndpointCache endpointCache = this.addressCacheByEndpoint.get(endpoint); + + if (endpointCache != null) { + return endpointCache.addressCache; + } + + return null; + } + static class EndpointCache { GatewayAddressCache addressCache; AddressResolver addressResolver; From 271e25f5bf70890c7bfaab2a8e02224e4b4863a8 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 10 Jul 2024 11:08:02 -0400 Subject: [PATCH 124/140] Refactoring. --- ...onsecutiveExceptionBasedCircuitBreaker.java | 5 +---- ...titionEndpointManagerForCircuitBreaker.java | 2 -- .../LocationSpecificHealthContext.java | 18 ++---------------- ...SpecificHealthContextTransitionHandler.java | 3 +-- 4 files changed, 4 insertions(+), 24 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index ecb45f4d1ca4..102d6d6d6a8e 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -34,8 +34,7 @@ public LocationSpecificHealthContext handleException(LocationSpecificHealthConte LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()) - .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()); + .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); if (isReadOnlyRequest) { @@ -81,7 +80,6 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext LocationSpecificHealthContext.Builder builder = new LocationSpecificHealthContext.Builder() .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()) .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); if (isReadOnlyRequest) { @@ -109,7 +107,6 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext builder = new LocationSpecificHealthContext.Builder() .withUnavailableSince(locationSpecificHealthContext.getUnavailableSince()) .withLocationHealthStatus(locationSpecificHealthContext.getLocationHealthStatus()) - .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()) .withExceptionThresholdBreached(locationSpecificHealthContext.isExceptionThresholdBreached()); if (isReadOnlyRequest) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 0e9ef8907c5c..5f28e181d365 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -377,7 +377,6 @@ private boolean handleException( .withUnavailableSince(Instant.MAX) .withLocationHealthStatus(LocationHealthStatus.HealthyWithFailures) .withExceptionThresholdBreached(false) - .withLastCollectionLinkSeen(collectionLink) .build(); } @@ -428,7 +427,6 @@ private void handleSuccess( .withUnavailableSince(Instant.MAX) .withLocationHealthStatus(LocationHealthStatus.Healthy) .withExceptionThresholdBreached(false) - .withLastCollectionLinkSeen(collectionLink) .build(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java index 7e2f90674f5d..472778e50fcb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContext.java @@ -24,7 +24,6 @@ public class LocationSpecificHealthContext implements Serializable { private final Instant unavailableSince; private final LocationHealthStatus locationHealthStatus; private final boolean isExceptionThresholdBreached; - private final String lastCollectionLinkSeen; LocationSpecificHealthContext( int successCountForWriteForRecovery, @@ -33,8 +32,7 @@ public class LocationSpecificHealthContext implements Serializable { int exceptionCountForReadForCircuitBreaking, Instant unavailableSince, LocationHealthStatus locationHealthStatus, - boolean isExceptionThresholdBreached, - String lastCollectionLinkSeen) { + boolean isExceptionThresholdBreached) { this.successCountForWriteForRecovery = successCountForWriteForRecovery; this.exceptionCountForWriteForCircuitBreaking = exceptionCountForWriteForCircuitBreaking; @@ -43,7 +41,6 @@ public class LocationSpecificHealthContext implements Serializable { this.unavailableSince = unavailableSince; this.locationHealthStatus = locationHealthStatus; this.isExceptionThresholdBreached = isExceptionThresholdBreached; - this.lastCollectionLinkSeen = lastCollectionLinkSeen; } public boolean isExceptionThresholdBreached() { @@ -80,10 +77,6 @@ public LocationHealthStatus getLocationHealthStatus() { return this.locationHealthStatus; } - public String getLastCollectionLinkSeen() { - return this.lastCollectionLinkSeen; - } - static class Builder { private int exceptionCountForWriteForCircuitBreaking; @@ -93,7 +86,6 @@ static class Builder { private Instant unavailableSince; private LocationHealthStatus locationHealthStatus; private boolean isExceptionThresholdBreached; - private String lastCollectionLinkSeen; public Builder() {} @@ -132,11 +124,6 @@ public Builder withExceptionThresholdBreached(boolean exceptionThresholdBreached return this; } - public Builder withLastCollectionLinkSeen(String lastCollectionLinkSeen) { - this.lastCollectionLinkSeen = lastCollectionLinkSeen; - return this; - } - public LocationSpecificHealthContext build() { return new LocationSpecificHealthContext( @@ -146,8 +133,7 @@ public LocationSpecificHealthContext build() { this.exceptionCountForReadForCircuitBreaking, this.unavailableSince, this.locationHealthStatus, - this.isExceptionThresholdBreached, - this.lastCollectionLinkSeen); + this.isExceptionThresholdBreached); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index 381efb53023a..2848803a8bd2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -193,8 +193,7 @@ public LocationSpecificHealthContext transitionHealthStatus(LocationSpecificHeal .withSuccessCountForWriteForRecovery(0) .withExceptionCountForWriteForCircuitBreaking(0) .withSuccessCountForReadForRecovery(0) - .withExceptionCountForReadForCircuitBreaking(0) - .withLastCollectionLinkSeen(locationSpecificHealthContext.getLastCollectionLinkSeen()); + .withExceptionCountForReadForCircuitBreaking(0); switch (newStatus) { case Healthy: From 05e6c06a351baa8f6ee7dcb6e531dc7ac84c95e6 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 10 Jul 2024 11:56:37 -0400 Subject: [PATCH 125/140] Refactoring. --- .../PartitionLevelCircuitBreakerTests.java | 28 +++++++++---------- 1 file changed, 13 insertions(+), 15 deletions(-) rename sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/{implementation => }/PartitionLevelCircuitBreakerTests.java (99%) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java similarity index 99% rename from sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java rename to sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index ed7f561c439b..2469d936d40e 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -1,22 +1,20 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. -package com.azure.cosmos.implementation; - -import com.azure.cosmos.ConnectionMode; -import com.azure.cosmos.CosmosAsyncClient; -import com.azure.cosmos.CosmosAsyncContainer; -import com.azure.cosmos.CosmosAsyncDatabase; -import com.azure.cosmos.CosmosClientBuilder; -import com.azure.cosmos.CosmosDiagnosticsContext; -import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfig; -import com.azure.cosmos.CosmosEndToEndOperationLatencyPolicyConfigBuilder; -import com.azure.cosmos.CosmosException; -import com.azure.cosmos.CosmosRegionSwitchHint; -import com.azure.cosmos.SessionRetryOptionsBuilder; -import com.azure.cosmos.TestObject; -import com.azure.cosmos.ThresholdBasedAvailabilityStrategy; +package com.azure.cosmos; + import com.azure.cosmos.faultinjection.FaultInjectionTestBase; +import com.azure.cosmos.implementation.ConnectionPolicy; +import com.azure.cosmos.implementation.DatabaseAccount; +import com.azure.cosmos.implementation.DatabaseAccountLocation; +import com.azure.cosmos.implementation.DocumentCollection; +import com.azure.cosmos.implementation.GlobalEndpointManager; +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.ImplementationBridgeHelpers; +import com.azure.cosmos.implementation.PartitionKeyRange; +import com.azure.cosmos.implementation.RxDocumentClientImpl; +import com.azure.cosmos.implementation.TestConfigurations; +import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; import com.azure.cosmos.implementation.circuitBreaker.ConsecutiveExceptionBasedCircuitBreaker; From a51ed160f152778466133a3c7059ce8856b6b625 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 10 Jul 2024 19:28:25 -0400 Subject: [PATCH 126/140] Refactoring. --- sdk/cosmos/azure-cosmos-benchmark/pom.xml | 63 ++ sdk/cosmos/azure-cosmos-tests/pom.xml | 63 ++ .../PartitionLevelCircuitBreakerTests.java | 647 +++++++++++++++--- .../com/azure/cosmos/rx/TestSuiteBase.java | 4 +- .../circuit-breaker-misc-direct-testng.xml | 35 + .../circuit-breaker-misc-gateway-testng.xml | 35 + ...cuit-breaker-read-all-read-many-testng.xml | 35 + sdk/cosmos/live-platform-matrix.json | 5 +- 8 files changed, 804 insertions(+), 83 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-direct-testng.xml create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-gateway-testng.xml create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-read-all-read-many-testng.xml diff --git a/sdk/cosmos/azure-cosmos-benchmark/pom.xml b/sdk/cosmos/azure-cosmos-benchmark/pom.xml index 2f8a5290e170..4b45e7cd2f48 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/pom.xml +++ b/sdk/cosmos/azure-cosmos-benchmark/pom.xml @@ -451,6 +451,69 @@ Licensed under the MIT License. + + + circuit-breaker-read-all-read-many + + circuit-breaker-read-all-read-many + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-read-all-read-many-testng.xml + + + + + + + + + circuit-breaker-misc-direct + + circuit-breaker-misc-direct + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-misc-direct-testng.xml + + + + + + + + + circuit-breaker-misc-gateway + + circuit-breaker-misc-gateway + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-misc-gateway-testng.xml + + + + + + flaky-multi-master diff --git a/sdk/cosmos/azure-cosmos-tests/pom.xml b/sdk/cosmos/azure-cosmos-tests/pom.xml index 2c181da7b109..6db98af64b35 100644 --- a/sdk/cosmos/azure-cosmos-tests/pom.xml +++ b/sdk/cosmos/azure-cosmos-tests/pom.xml @@ -465,6 +465,69 @@ Licensed under the MIT License. + + + circuit-breaker-read-all-read-many + + circuit-breaker-read-all-read-many + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-read-all-read-many-testng.xml + + + + + + + + + circuit-breaker-misc-direct + + circuit-breaker-misc-direct + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-misc-direct-testng.xml + + + + + + + + + circuit-breaker-misc-gateway + + circuit-breaker-misc-gateway + + + + + org.apache.maven.plugins + maven-failsafe-plugin + 3.2.5 + + + src/test/resources/circuit-breaker-misc-gateway-testng.xml + + + + + + flaky-multi-master diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 2469d936d40e..d1608f42a6f5 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -86,13 +86,13 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { private static final CosmosEndToEndOperationLatencyPolicyConfig NO_END_TO_END_TIMEOUT = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofDays(1)).build(); - private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY - = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) + private static final CosmosEndToEndOperationLatencyPolicyConfig THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(3)) .availabilityStrategy(new ThresholdBasedAvailabilityStrategy()) .build(); - private static final CosmosEndToEndOperationLatencyPolicyConfig TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY - = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(2)) + private static final CosmosEndToEndOperationLatencyPolicyConfig THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY + = new CosmosEndToEndOperationLatencyPolicyConfigBuilder(Duration.ofSeconds(3)) .build(); Consumer validateDiagnosticsContextHasFirstPreferredRegionOnly = (ctx) -> { @@ -225,7 +225,7 @@ public PartitionLevelCircuitBreakerTests(CosmosClientBuilder cosmosClientBuilder super(cosmosClientBuilder); } - @BeforeClass(groups = {"multi-master"}) + @BeforeClass(groups = {"circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}) public void beforeClass() { try (CosmosAsyncClient testClient = getClientBuilder().buildAsyncClient()) { RxDocumentClientImpl documentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(testClient); @@ -261,8 +261,8 @@ public void beforeClass() { } } - @DataProvider(name = "miscellaneousOpTestConfigs") - public Object[][] miscellaneousOpTestConfigs() { + @DataProvider(name = "miscellaneousOpTestConfigsDirect") + public Object[][] miscellaneousOpTestConfigsDirect() { // General testing flow: // Below tests choose a fault type to inject, regions to inject the fault in @@ -291,7 +291,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -313,7 +313,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -335,7 +335,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for DELETE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -357,7 +357,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for PATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -379,7 +379,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -401,7 +401,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -424,7 +424,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for BATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -446,7 +446,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -467,7 +467,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // Server-generated 410 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -480,7 +480,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildServerGeneratedGoneErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -502,7 +502,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildServerGeneratedGoneErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -524,7 +524,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildServerGeneratedGoneErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -546,7 +546,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildServerGeneratedGoneErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -568,7 +568,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildServerGeneratedGoneErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -590,7 +590,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildServerGeneratedGoneErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -613,7 +613,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildServerGeneratedGoneErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -623,11 +623,11 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ONLY_DIRECT_MODE }, -// // Response-delay injected into first preferred region for CREATE_ITEM operation -// // injected into all replicas of the faulty EPK range (although only the primary replica -// // is ever involved - effectively doesn't impact the assertions for this test). -// // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) -// // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. + // Response-delay injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException (since end-to-end timeout is configured) + // and only to succeed when moved over to the second preferred region when the first preferred region has been short-circuited. new Object[]{ String.format("Test with faulty %s with response delay in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), new FaultInjectionRuleParamsWrapper() @@ -636,7 +636,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), this.buildTransitTimeoutFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -659,7 +659,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)) .withResponseDelay(Duration.ofSeconds(6)), this.buildTransitTimeoutFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -728,7 +728,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), this.buildInternalServerErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, @@ -736,7 +736,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -750,7 +750,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(6), this.buildInternalServerErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, @@ -758,7 +758,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -771,7 +771,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), this.buildInternalServerErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, @@ -779,7 +779,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -794,7 +794,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withHitLimit(11), this.buildInternalServerErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasInternalServerError, @@ -802,9 +802,9 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, - // 429 injected into first preferred region for READ_ITEM operation + // 449 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. @@ -815,7 +815,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -823,9 +823,9 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, - // 429 injected into first preferred region for CREATE_ITEM operation + // 449 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica // is ever involved - effectively doesn't impact the assertions for this test). // Expectation is for the operation to hit OperationCancelledException and only to succeed when @@ -837,7 +837,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -845,9 +845,9 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, - // 429 injected into first preferred region for QUERY_ITEM operation + // 449 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to hit OperationCancelledException and only to succeed when // moved over to the second preferred region when the first preferred region has been short-circuited. @@ -859,7 +859,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -867,7 +867,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // 404/1002 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -880,7 +880,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildReadWriteSessionNotAvailableFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -902,7 +902,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildReadWriteSessionNotAvailableFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, CosmosRegionSwitchHint.LOCAL_REGION_PREFERRED, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -924,7 +924,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildRetryWithFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -946,7 +946,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildRetryWithFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasOperationCancelledException, @@ -975,7 +975,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, // 503 injected into all regions for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -997,12 +997,12 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, - //503 injected into all regions for QUERY_ITEM operation - //injected into all replicas of the faulty EPK range. - //Expectation is for the operation to hit 503 until fault injection has it its injection limits. - //After that, the operation should see a success from the first preferred region. + // 503 injected into all regions for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. new Object[] { String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), new FaultInjectionRuleParamsWrapper() @@ -1018,12 +1018,12 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ONLY_DIRECT_MODE }, - //429 injected into first preferred region for READ_ITEM operation - //injected into all replicas of the faulty EPK range. - //Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) - //and only from the second preferred region when short-circuiting has kicked in for the first preferred region. + // 449 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) + // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. new Object[]{ String.format("Test with faulty %s with too many requests error in first preferred region with threshold-based availability strategy enabled.", FaultInjectionOperationType.READ_ITEM), new FaultInjectionRuleParamsWrapper() @@ -1031,7 +1031,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, @@ -1041,7 +1041,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ONLY_DIRECT_MODE }, - // 429 injected into first preferred region for CREATE_ITEM operation + // 449 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) // and only from the second preferred region when short-circuiting has kicked in for the first preferred region. @@ -1052,7 +1052,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, @@ -1062,7 +1062,7 @@ public Object[][] miscellaneousOpTestConfigs() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ONLY_DIRECT_MODE }, - // 429 injected into first preferred region for QUERY_ITEM operation + // 449 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled & non-idempotent write retry policy enabled) // and will have two regions contacted post circuit breaking (one for QueryPlan and the other for the data plane request). @@ -1073,7 +1073,7 @@ public Object[][] miscellaneousOpTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, this.validateResponseHasSuccess, @@ -1086,6 +1086,433 @@ public Object[][] miscellaneousOpTestConfigs() { }; } + @DataProvider(name = "miscellaneousOpTestConfigsGateway") + public Object[][] miscellaneousOpTestConfigsGateway() { + + // General testing flow: + // Below tests choose a fault type to inject, regions to inject the fault in + // and the operation type for which the fault is injected. The idea is to assert + // what happens when faults are being injected - should an exception bubble up + // in the process [or] should the operation succeed, region contacted when circuit + // breaking has kicked in and region contacted when region + partition combination is + // being marked back as UnhealthyTentative (eligible to accept requests) + return new Object[][]{ + // Server-generated 503 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + validateResponseHasSuccess, + validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.REPLACE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.REPLACE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for DELETE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.DELETE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.DELETE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for PATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.PATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.PATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but include + // the second preferred region when the first preferred region has been short-circuited. + // For queries which require a QueryPlan, the first preferred region is contacted (not a data plane request + // which will hit a data partition so is not eligible for circuit breaking). + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for BATCH_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.BATCH_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.BATCH_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to succeed in all runs but to move over to + // the second preferred region when the first preferred region has been short-circuited. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasSuccess, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, +// 500 (internal server error) injected into first preferred region for READ_ITEM operation +// injected into all replicas of the faulty EPK range. +// Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation +// should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.READ_FEED_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_FEED_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to fail with 500 until short-circuiting kicks in where the operation + // should see a success from the second preferred region. Although, after short-circuiting, a query operation + // will see request for QueryPlan from the short-circuited region. + { + String.format("Test with faulty %s with internal server error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 429 injected into first preferred region for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 429 injected into first preferred region for CREATE_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.CREATE_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.CREATE_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasSecondPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 429 injected into first preferred region for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit OperationCancelledException and only to succeed when + // moved over to the second preferred region when the first preferred region has been short-circuited. + // QUERY_ITEM operation will see requests hit even for short-circuited region for fetching the QueryPlan. + { + String.format("Test with faulty %s with too many requests error in the first preferred region.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)) + .withFaultInjectionDuration(Duration.ofSeconds(60)), + this.buildTooManyRequestsErrorFaultInjectionRules, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasOperationCancelledException, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 503 injected into all regions for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + // 503 injected into all regions for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (although only the primary replica + // is ever involved - effectively doesn't impact the assertions for this test). + // Expectation is for the operation to hit 503 until fault injection has it its injection limits. + // After that, the operation should see a success from the first preferred region. + new Object[]{ + String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(6), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + //503 injected into all regions for QUERY_ITEM operation + //injected into all replicas of the faulty EPK range. + //Expectation is for the operation to hit 503 until fault injection has it its injection limits. + //After that, the operation should see a success from the first preferred region. + new Object[] { + String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildServiceUnavailableFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasServiceUnavailableError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE + }, + }; + } + @DataProvider(name = "readManyTestConfigs") public Object[][] readManyTestConfigs() { @@ -1169,7 +1596,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServerGeneratedGoneErrorFaultInjectionRules, executeReadManyOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, @@ -1190,7 +1617,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildTooManyRequestsErrorFaultInjectionRules, executeReadManyOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, @@ -1211,7 +1638,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildReadWriteSessionNotAvailableFaultInjectionRules, executeReadManyOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, @@ -1254,7 +1681,7 @@ public Object[][] readManyTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, executeReadManyOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasSuccess, this.validateResponseHasSuccess, @@ -1351,7 +1778,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildServerGeneratedGoneErrorFaultInjectionRules, executeReadAllOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, @@ -1372,7 +1799,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildTooManyRequestsErrorFaultInjectionRules, executeReadAllOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, @@ -1393,7 +1820,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionApplicableRegions(this.writeRegions.subList(0, 1)), this.buildReadWriteSessionNotAvailableFaultInjectionRules, executeReadAllOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITHOUT_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasOperationCancelledException, this.validateResponseHasSuccess, @@ -1436,7 +1863,7 @@ public Object[][] readAllTestConfigs() { .withFaultInjectionDuration(Duration.ofSeconds(60)), this.buildTooManyRequestsErrorFaultInjectionRules, executeReadAllOperation, - TWO_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, + THREE_SECOND_END_TO_END_TIMEOUT_WITH_THRESHOLD_BASED_AVAILABILITY_STRATEGY, NO_REGION_SWITCH_HINT, this.validateResponseHasSuccess, this.validateResponseHasSuccess, @@ -1448,8 +1875,67 @@ public Object[][] readAllTestConfigs() { }; } - @Test(groups = {"multi-master"}, dataProvider = "miscellaneousOpTestConfigs", timeOut = 80 * TIMEOUT) - public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegions( + @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "miscellaneousOpTestConfigsDirect", timeOut = 80 * TIMEOUT) + public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDirect( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes) { + + executeMiscOperationHitsTerminalExceptionAcrossKRegions( + testId, + faultInjectionRuleParamsWrapper, + generateFaultInjectionRules, + e2eLatencyPolicyCfg, + regionSwitchHint, + nonIdempotentWriteRetriesEnabled, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + allowedConnectionModes); + } + + @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "miscellaneousOpTestConfigsGateway", timeOut = 80 * TIMEOUT) + public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsGateway( + String testId, + FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, + Function> generateFaultInjectionRules, + CosmosEndToEndOperationLatencyPolicyConfig e2eLatencyPolicyCfg, + CosmosRegionSwitchHint regionSwitchHint, + Boolean nonIdempotentWriteRetriesEnabled, + Consumer> validateResponseInPresenceOfFaults, + Consumer> validateResponseInAbsenceOfFaults, + Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, + Consumer validateRegionsContactedWhenExceptionBubblesUp, + Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + Set allowedConnectionModes) { + + executeMiscOperationHitsTerminalExceptionAcrossKRegions( + testId, + faultInjectionRuleParamsWrapper, + generateFaultInjectionRules, + e2eLatencyPolicyCfg, + regionSwitchHint, + nonIdempotentWriteRetriesEnabled, + validateResponseInPresenceOfFaults, + validateResponseInAbsenceOfFaults, + validateRegionsContactedWhenShortCircuitingHasKickedIn, + validateRegionsContactedWhenExceptionBubblesUp, + validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + allowedConnectionModes); + } + + private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, Function> generateFaultInjectionRules, @@ -1556,7 +2042,8 @@ public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegions( 15); } - @Test(groups = {"multi-master"}, dataProvider = "readManyTestConfigs", timeOut = 80 * TIMEOUT) + + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readManyTestConfigs", timeOut = 4 * TIMEOUT) public void readManyOperationHitsTerminalExceptionAcrossKRegions( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -1660,7 +2147,7 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( 15); } - @Test(groups = {"multi-master"}, dataProvider = "readAllTestConfigs", timeOut = 80 * TIMEOUT) + @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readAllTestConfigs", timeOut = 4 * TIMEOUT) public void readAllOperationHitsTerminalExceptionAcrossKRegions( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -2297,7 +2784,7 @@ private String resolveContainerIdByFaultInjectionOperationType(FaultInjectionOpe } } - @AfterClass(groups = {"multi-master"}) + @AfterClass(groups = {"circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}) public void afterClass() { CosmosClientBuilder clientBuilder = new CosmosClientBuilder() .endpoint(TestConfigurations.HOST) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index 68875364b16c..c9cf5c5a9f6c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -201,7 +201,7 @@ public CosmosAsyncDatabase getDatabase(String id) { } } - @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SETUP_TIMEOUT) + @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SETUP_TIMEOUT) public void beforeSuite() { logger.info("beforeSuite Started"); @@ -217,7 +217,7 @@ public void beforeSuite() { } } - @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) + @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) public void afterSuite() { logger.info("afterSuite Started"); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-direct-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-direct-testng.xml new file mode 100644 index 000000000000..37adba46a374 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-direct-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-gateway-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-gateway-testng.xml new file mode 100644 index 000000000000..b68cad70628e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-misc-gateway-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-read-all-read-many-testng.xml b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-read-all-read-many-testng.xml new file mode 100644 index 000000000000..541cd8cbb867 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/resources/circuit-breaker-read-all-read-many-testng.xml @@ -0,0 +1,35 @@ + + + + + + + + + + + + + + diff --git a/sdk/cosmos/live-platform-matrix.json b/sdk/cosmos/live-platform-matrix.json index a564450f6fec..933a3eeb6322 100644 --- a/sdk/cosmos/live-platform-matrix.json +++ b/sdk/cosmos/live-platform-matrix.json @@ -8,6 +8,9 @@ "-Pdirect": "Direct", "-Pmulti-master": "MultiMaster", "-Pflaky-multi-master": "FlakyMultiMaster", + "-Pcircuit-breaker-misc-direct": "CircuitBreakerMiscDirect", + "-Pcircuit-breaker-misc-gateway": "CircuitBreakerMiscGateway", + "-Pcircuit-breaker-read-all-read-many": "CircuitBreakerReadAllAndReadMany", "-Pmulti-region": "MultiRegion", "-Plong": "Long", "-DargLine=\"-Dazure.cosmos.directModeProtocol=Tcp\"": "TCP", @@ -101,7 +104,7 @@ } }, "PROTOCOLS": "[\"Tcp\"]", - "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pmulti-master", "-Pflaky-multi-master", "-Pfast", "-Pdirect" ], + "ProfileFlag": [ "-Pcfp-split", "-Psplit", "-Pquery", "-Pmulti-master", "-Pflaky-multi-master", "-Pcircuit-breaker-misc-direct", "-Pcircuit-breaker-misc-gateway", "-Pcircuit-breaker-read-all-read-many", "-Pfast", "-Pdirect" ], "Agent": { "ubuntu": { "OSVmImage": "env:LINUXVMIMAGE", "Pool": "env:LINUXPOOL" } } From d39017b349cf28a7becb8805e0ce9bcad73494be Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 11 Jul 2024 11:15:01 -0400 Subject: [PATCH 127/140] Fixing test pipeline. --- sdk/cosmos/azure-cosmos-benchmark/pom.xml | 63 ----------------------- 1 file changed, 63 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-benchmark/pom.xml b/sdk/cosmos/azure-cosmos-benchmark/pom.xml index 4b45e7cd2f48..2f8a5290e170 100644 --- a/sdk/cosmos/azure-cosmos-benchmark/pom.xml +++ b/sdk/cosmos/azure-cosmos-benchmark/pom.xml @@ -451,69 +451,6 @@ Licensed under the MIT License. - - - circuit-breaker-read-all-read-many - - circuit-breaker-read-all-read-many - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/circuit-breaker-read-all-read-many-testng.xml - - - - - - - - - circuit-breaker-misc-direct - - circuit-breaker-misc-direct - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/circuit-breaker-misc-direct-testng.xml - - - - - - - - - circuit-breaker-misc-gateway - - circuit-breaker-misc-gateway - - - - - org.apache.maven.plugins - maven-failsafe-plugin - 3.2.5 - - - src/test/resources/circuit-breaker-misc-gateway-testng.xml - - - - - - flaky-multi-master From 63e4d764d646ae94d011282e08c5d342165c65e0 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 13 Jul 2024 21:24:22 -0400 Subject: [PATCH 128/140] Reacting to review comments. --- .../PartitionLevelCircuitBreakerTests.java | 1 - ...CollectionRoutingMapNotFoundException.java | 27 +++++++++++++++++ .../CosmosQueryRequestOptionsImpl.java | 11 ------- .../DocumentServiceRequestContext.java | 10 +------ ...FeedOperationContextForCircuitBreaker.java | 1 - .../cosmos/implementation/HttpConstants.java | 3 +- .../ImplementationBridgeHelpers.java | 4 --- .../implementation/RxDocumentClientImpl.java | 30 ++----------------- .../batch/BulkExecutorUtil.java | 23 +------------- ...nsecutiveExceptionBasedCircuitBreaker.java | 1 + ...itionEndpointManagerForCircuitBreaker.java | 5 +++- .../models/CosmosQueryRequestOptions.java | 19 ------------ 12 files changed, 38 insertions(+), 97 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CollectionRoutingMapNotFoundException.java diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index d1608f42a6f5..81cc64576995 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -2042,7 +2042,6 @@ private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( 15); } - @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readManyTestConfigs", timeOut = 4 * TIMEOUT) public void readManyOperationHitsTerminalExceptionAcrossKRegions( String testId, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CollectionRoutingMapNotFoundException.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CollectionRoutingMapNotFoundException.java new file mode 100644 index 000000000000..ad0a9c82fed1 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CollectionRoutingMapNotFoundException.java @@ -0,0 +1,27 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos.implementation; + +import com.azure.cosmos.CosmosException; +import com.azure.cosmos.implementation.directconnectivity.WFConstants; + +public class CollectionRoutingMapNotFoundException extends CosmosException { + private static final long serialVersionUID = 1L; + + /** + * Instantiates a new Invalid partition exception. + * + * @param msg the msg + */ + public CollectionRoutingMapNotFoundException(String msg) { + super(HttpConstants.StatusCodes.NOTFOUND, msg); + setSubStatus(); + } + + private void setSubStatus() { + this.getResponseHeaders().put( + WFConstants.BackendHeaders.SUB_STATUS, + Integer.toString(HttpConstants.SubStatusCodes.INCORRECT_CONTAINER_RID_SUB_STATUS)); + } +} diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java index 69f3ea98d513..b7b7b3f50b0b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/CosmosQueryRequestOptionsImpl.java @@ -31,7 +31,6 @@ public final class CosmosQueryRequestOptionsImpl extends CosmosQueryRequestOptio private Integer maxItemCountForVectorSearch; private List cancelledRequestDiagnosticsTracker = new ArrayList<>(); private String collectionRid; - private boolean isPerPartitionCircuitBreakingDisabled = false; /** * Instantiates a new query request options. @@ -71,7 +70,6 @@ public CosmosQueryRequestOptionsImpl(CosmosQueryRequestOptionsImpl options) { this.cancelledRequestDiagnosticsTracker = options.cancelledRequestDiagnosticsTracker; this.maxItemCountForVectorSearch = options.maxItemCountForVectorSearch; this.collectionRid = options.collectionRid; - this.isPerPartitionCircuitBreakingDisabled = options.isPerPartitionCircuitBreakingDisabled; } /** @@ -399,13 +397,4 @@ public String getCollectionRid() { public void setCollectionRid(String collectionRid) { this.collectionRid = collectionRid; } - - public boolean isPerPartitionCircuitBreakingDisabled() { - return this.isPerPartitionCircuitBreakingDisabled; - } - - public CosmosQueryRequestOptionsImpl setPerPartitionCircuitBreakingDisabled(boolean isPartitionCircuitBreakingDisabled) { - this.isPerPartitionCircuitBreakingDisabled = isPartitionCircuitBreakingDisabled; - return this; - } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index 6b407eb45f18..953d8e870413 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -66,7 +66,6 @@ public class DocumentServiceRequestContext implements Cloneable { private volatile Supplier clientRetryPolicySupplier; private volatile Utils.ValueHolder> regionToLocationSpecificHealthContext = new Utils.ValueHolder<>(); - private volatile boolean isPerPartitionCircuitBreakerDisabledForRequest = false; public DocumentServiceRequestContext() {} @@ -151,6 +150,7 @@ public DocumentServiceRequestContext clone() { context.endToEndOperationLatencyPolicyConfig = this.endToEndOperationLatencyPolicyConfig; context.unavailableRegionsForPartition = this.unavailableRegionsForPartition; context.feedOperationContextForCircuitBreaker = this.feedOperationContextForCircuitBreaker; + context.pointOperationContextForCircuitBreaker = this.pointOperationContextForCircuitBreaker; return context; } @@ -237,13 +237,5 @@ public Utils.ValueHolder> getLocation public void setLocationToLocationSpecificHealthContext(Map regionToLocationSpecificHealthContext) { this.regionToLocationSpecificHealthContext.v = regionToLocationSpecificHealthContext; } - - public boolean isPerPartitionCircuitBreakerDisabledForRequest() { - return this.isPerPartitionCircuitBreakerDisabledForRequest; - } - - public void setPerPartitionCircuitBreakerDisabledForRequest(boolean perPartitionCircuitBreakerDisabledForRequest) { - this.isPerPartitionCircuitBreakerDisabledForRequest = perPartitionCircuitBreakerDisabledForRequest; - } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java index b58f4ab6e89c..0858b766f182 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/FeedOperationContextForCircuitBreaker.java @@ -3,7 +3,6 @@ package com.azure.cosmos.implementation; -import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.PartitionKeyRangeWrapper; import java.util.Map; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java index a1ae403a64f5..795b32a57015 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java @@ -434,6 +434,8 @@ public static class SubStatusCodes { public static final int TIMEOUT_GENERATED_410 = 20002; // Client generated operation timeout exception public static final int CLIENT_OPERATION_TIMEOUT = 20008; + // Sub-status code paired with 408 status code + public static final int TRANSIT_TIMEOUT = 20911; // IMPORTANT - below sub status codes have no corresponding .Net // version, because they are only applicable in Java @@ -449,7 +451,6 @@ public static class SubStatusCodes { public static final int INVALID_BACKEND_RESPONSE = 20908; public static final int UNKNOWN_QUORUM_RESULT = 20909; public static final int INVALID_RESULT = 20910; - public static final int TRANSIT_TIMEOUT = 20911; //SDK Codes (Server) // IMPORTANT - whenever possible use consistency substatus codes that .Net SDK also uses diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 2bf1e23307e5..74a965e03229 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -304,10 +304,6 @@ void setCancelledRequestDiagnosticsTracker( void setCollectionRid(CosmosQueryRequestOptions options, String collectionRid); String getCollectionRid(CosmosQueryRequestOptions options); - - boolean isPerPartitionCircuitBreakerDisabled(CosmosQueryRequestOptions options); - - void disablePerPartitionCircuitBreaker(CosmosQueryRequestOptions options); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index a9f83734be24..c5d05dd73af2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -3959,14 +3959,9 @@ public Mono populateFeedRangeHeader(RxDocumentServiceR @Override public Mono addPartitionLevelUnavailableRegionsOnRequest(RxDocumentServiceRequest request, CosmosQueryRequestOptions queryRequestOptions) { - ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.CosmosQueryRequestOptionsAccessor queryRequestOptionsAccessor - = ImplementationBridgeHelpers.CosmosQueryRequestOptionsHelper.getCosmosQueryRequestOptionsAccessor(); - - request.requestContext.setPerPartitionCircuitBreakerDisabledForRequest(queryRequestOptionsAccessor.isPerPartitionCircuitBreakerDisabled(queryRequestOptions)); - if (RxDocumentClientImpl.this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(request)) { - String collectionRid = queryRequestOptionsAccessor.getCollectionRid(queryRequestOptions); + String collectionRid = RxDocumentClientImpl.qryOptAccessor.getCollectionRid(queryRequestOptions); checkNotNull(collectionRid, "Argument 'collectionRid' cannot be null!"); @@ -3974,7 +3969,7 @@ public Mono addPartitionLevelUnavailableRegionsOnReque .flatMap(collectionRoutingMapValueHolder -> { if (collectionRoutingMapValueHolder.v == null) { - return Mono.error(new NotFoundException("collectionRoutingMap could not be found!")); + return Mono.error(new CollectionRoutingMapNotFoundException("Argument 'collectionRoutingMapValueHolder.v' cannot be null!")); } RxDocumentClientImpl.this.addPartitionLevelUnavailableRegionsForFeedRequest(request, queryRequestOptions, collectionRoutingMapValueHolder.v); @@ -6756,25 +6751,4 @@ public void reset() { this.isMerged.set(false); } } - - private static class CollectionRoutingMapNotFoundException extends CosmosException { - - private static final long serialVersionUID = 1L; - - /** - * Instantiates a new Invalid partition exception. - * - * @param msg the msg - */ - public CollectionRoutingMapNotFoundException(String msg) { - super(HttpConstants.StatusCodes.NOTFOUND, msg); - setSubStatus(); - } - - private void setSubStatus() { - this.getResponseHeaders().put( - WFConstants.BackendHeaders.SUB_STATUS, - Integer.toString(HttpConstants.SubStatusCodes.INCORRECT_CONTAINER_RID_SUB_STATUS)); - } - } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java index c695378e6720..0537a225e248 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/batch/BulkExecutorUtil.java @@ -9,6 +9,7 @@ import com.azure.cosmos.CosmosItemSerializer; import com.azure.cosmos.ThrottlingRetryOptions; import com.azure.cosmos.implementation.AsyncDocumentClient; +import com.azure.cosmos.implementation.CollectionRoutingMapNotFoundException; import com.azure.cosmos.implementation.DocumentCollection; import com.azure.cosmos.implementation.HttpConstants; import com.azure.cosmos.implementation.ResourceThrottleRetryPolicy; @@ -200,26 +201,4 @@ static boolean isWriteOperation(CosmosItemOperationType cosmosItemOperationType) cosmosItemOperationType == CosmosItemOperationType.DELETE || cosmosItemOperationType == CosmosItemOperationType.PATCH; } - - static class CollectionRoutingMapNotFoundException extends CosmosException { - - private static final long serialVersionUID = 1L; - - /** - * Instantiates a new Invalid partition exception. - * - * @param msg the msg - */ - public CollectionRoutingMapNotFoundException(String msg) { - super(HttpConstants.StatusCodes.NOTFOUND, msg); - setSubStatus(); - } - - private void setSubStatus() { - this.getResponseHeaders().put( - WFConstants.BackendHeaders.SUB_STATUS, - Integer.toString(HttpConstants.SubStatusCodes.INCORRECT_CONTAINER_RID_SUB_STATUS)); - } - } - } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 102d6d6d6a8e..02d882e09076 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -174,6 +174,7 @@ public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, case HealthyTentative: return this.partitionLevelCircuitBreakerConfig.getConsecutiveExceptionCountToleratedForWrites() / 2; case Healthy: + case Unavailable: return 0; default: throw new IllegalStateException("Unsupported health status: " + status); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 5f28e181d365..ad970a60b1ef 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -318,7 +318,10 @@ public boolean isPartitionLevelCircuitBreakingApplicable(RxDocumentServiceReques return false; } - if (request.requestContext.isPerPartitionCircuitBreakerDisabledForRequest()) { + // could be a possible scenario when end-to-end timeout set on the operation is negative + // failing the operation with a NullPointerException would suppress the real issue in this case + // so when request is null - circuit breaking is effectively disabled + if (request == null) { return false; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java index 214535b0eb2d..c252ebfb5d3b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/models/CosmosQueryRequestOptions.java @@ -522,15 +522,6 @@ CosmosQueryRequestOptions setPartitionKeyRangeIdInternal(String partitionKeyRang return this; } - CosmosQueryRequestOptions disablePerPartitionCircuitBreaking() { - this.actualRequestOptions.setPerPartitionCircuitBreakingDisabled(true); - return this; - } - - boolean isPerPartitionCircuitBreakingDisabled() { - return this.actualRequestOptions.isPerPartitionCircuitBreakingDisabled(); - } - /** * Sets the custom ids. * @@ -668,16 +659,6 @@ public void setCollectionRid(CosmosQueryRequestOptions options, String collectio public String getCollectionRid(CosmosQueryRequestOptions options) { return options.actualRequestOptions.getCollectionRid(); } - - @Override - public boolean isPerPartitionCircuitBreakerDisabled(CosmosQueryRequestOptions options) { - return options.actualRequestOptions.isPerPartitionCircuitBreakingDisabled(); - } - - @Override - public void disablePerPartitionCircuitBreaker(CosmosQueryRequestOptions options) { - options.actualRequestOptions.setPerPartitionCircuitBreakingDisabled(true); - } }); } From 3120504941c37b6f5dea31344b15bff7e938e042 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 13 Jul 2024 21:56:50 -0400 Subject: [PATCH 129/140] Modify test pipeline timeout. --- sdk/cosmos/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/tests.yml b/sdk/cosmos/tests.yml index ffa8784d6ce9..e98292e25c7c 100644 --- a/sdk/cosmos/tests.yml +++ b/sdk/cosmos/tests.yml @@ -29,7 +29,7 @@ extends: groupId: com.azure - name: azure-cosmos-benchmark groupId: com.azure - TimeoutInMinutes: 120 + TimeoutInMinutes: 180 MaxParallel: 20 PreSteps: - template: /eng/pipelines/templates/steps/install-reporting-tools.yml From 8194c10bbd3effe22eaf765537921ed91015e0c8 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 15 Jul 2024 19:15:30 -0400 Subject: [PATCH 130/140] Reacting to review comments. --- .../com/azure/cosmos/MaxRetryCountTests.java | 6 +- .../implementation/ClientRetryPolicy.java | 4 +- .../implementation/RxDocumentClientImpl.java | 56 +++++++++---------- ...itionEndpointManagerForCircuitBreaker.java | 2 - 4 files changed, 33 insertions(+), 35 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java index 44d0a06f2c2c..7cefbadb33b4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/MaxRetryCountTests.java @@ -131,10 +131,10 @@ public class MaxRetryCountTests extends TestSuiteBase { assertThat(subStatusCode).isEqualTo(HttpConstants.SubStatusCodes.SERVER_GENERATED_408); }; - private final static BiConsumer validateStatusCodeIsTimeout = + private final static BiConsumer validateStatusCodeIsTransitTimeout = (statusCode, subStatusCode) -> { assertThat(statusCode).isEqualTo(HttpConstants.StatusCodes.REQUEST_TIMEOUT); - assertThat(subStatusCode).isEqualTo(HttpConstants.SubStatusCodes.UNKNOWN); + assertThat(subStatusCode).isEqualTo(HttpConstants.SubStatusCodes.TRANSIT_TIMEOUT); }; private final static BiConsumer validateStatusCodeIsTransitTimeoutGenerated503ForWrite = @@ -944,7 +944,7 @@ public Object[][] testConfigs_readMaxRetryCount_transitTimeout() { notSpecifiedWhetherIdempotentWriteRetriesAreEnabled, sameDocumentIdJustCreated, injectTransitTimeoutIntoAllRegions.apply(minNetworkRequestTimeoutDuration), - validateStatusCodeIsTimeout, // when idempotent write is disabled, SDK will not retry for write operation, 408 will be bubbled up + validateStatusCodeIsTransitTimeout, // when idempotent write is disabled, SDK will not retry for write operation, 408 will be bubbled up (TriConsumer)(requestCount, consistencyLevel, operationType) -> assertThat(requestCount).isLessThanOrEqualTo( expectedMaxNumberOfRetriesForTransientTimeout( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 329c9f1a8f05..e1ecdc6bcc01 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -385,7 +385,9 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( return Mono.just(ShouldRetryResult.noRetry()); } - if (this.serviceUnavailableRetryCount++ > MaxServiceUnavailableRetryCount) { + this.serviceUnavailableRetryCount++; + + if (this.serviceUnavailableRetryCount > MaxServiceUnavailableRetryCount) { logger.warn("shouldRetryOnBackendServiceUnavailableAsync() Not retrying. Retry count = {}", this.serviceUnavailableRetryCount); return Mono.just(ShouldRetryResult.noRetry()); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index c5d05dd73af2..57127a8e3c2d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1872,11 +1872,6 @@ private Mono getCreateDocumentRequest(DocumentClientRe serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } - Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); return addPartitionKeyInformation(request, content, document, options, collectionObs); } @@ -2615,6 +2610,11 @@ private Mono> upsertDocumentInternal( retryPolicyInstance.onBeforeSendRequest(request); } + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -2859,18 +2859,9 @@ private Mono> replaceDocumentInternal( request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } - if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } @@ -2894,6 +2885,11 @@ private Mono> replaceDocumentInternal( retryPolicyInstance.onBeforeSendRequest(request); } + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + return replace(request, retryPolicyInstance); }) .map(resp -> toResourceResponse(resp, Document.class)); @@ -3046,11 +3042,6 @@ private Mono> patchDocumentInternal( serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } - Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); @@ -3074,6 +3065,11 @@ private Mono> patchDocumentInternal( retryPolicyInstance.onBeforeSendRequest(request); } + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + return patch(request, retryPolicyInstance); }) .map(resp -> toResourceResponse(resp, Document.class)); @@ -3189,15 +3185,6 @@ private Mono> deleteDocumentInternal( request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - if (retryPolicyInstance != null) { - retryPolicyInstance.onBeforeSendRequest(request); - } - - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } - Mono> collectionObs = collectionCache.resolveCollectionAsync( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); @@ -3205,7 +3192,6 @@ private Mono> deleteDocumentInternal( Mono requestObs = addPartitionKeyInformation( request, null, internalObjectNode, options, collectionObs); - return requestObs .flatMap(req -> { @@ -3218,6 +3204,11 @@ private Mono> deleteDocumentInternal( retryPolicyInstance.onBeforeSendRequest(request); } + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -3360,6 +3351,13 @@ private Mono> readDocumentInternal( retryPolicyInstance.onBeforeSendRequest(request); } + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext + = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + } + return this.read(req, retryPolicyInstance) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); }); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index ad970a60b1ef..409cbfb0638c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -95,7 +95,6 @@ public void handleLocationExceptionForPartitionKeyRange(RxDocumentServiceRequest isFailureThresholdBreached.set(partitionLevelLocationUnavailabilityInfoAsVal.handleException( partitionKeyRangeWrapperAsKey, - collectionLink, failedLocation, request.isReadOnlyRequest())); @@ -362,7 +361,6 @@ private PartitionLevelLocationUnavailabilityInfo() { private boolean handleException( PartitionKeyRangeWrapper partitionKeyRangeWrapper, - String collectionLink, URI locationWithException, boolean isReadOnlyRequest) { From 929196f9a0860a2a5d6dc7c16319f0229d56ab00 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 15 Jul 2024 22:40:41 -0400 Subject: [PATCH 131/140] Reacting to review comments. --- .../PartitionLevelCircuitBreakerTests.java | 130 +----------------- .../implementation/ClientRetryPolicy.java | 4 +- 2 files changed, 2 insertions(+), 132 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 81cc64576995..7ef79bfd2d42 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -956,70 +956,6 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ONLY_DIRECT_MODE }, - // 503 injected into all regions for READ_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[]{ - String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - }, - // 503 injected into all regions for UPSERT_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[]{ - String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(6), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - }, - // 503 injected into all regions for QUERY_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[] { - String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE - }, // 449 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to see a success for all runs (due to threshold-based availability strategy enabled) @@ -1445,71 +1381,7 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, ONLY_GATEWAY_MODE - }, - // 503 injected into all regions for READ_ITEM operation - // injected into all replicas of the faulty EPK range. - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[]{ - String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.READ_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE - }, - // 503 injected into all regions for UPSERT_ITEM operation - // injected into all replicas of the faulty EPK range (although only the primary replica - // is ever involved - effectively doesn't impact the assertions for this test). - // Expectation is for the operation to hit 503 until fault injection has it its injection limits. - // After that, the operation should see a success from the first preferred region. - new Object[]{ - String.format("Test with faulty %s with service unavailable error in in all regions.", FaultInjectionOperationType.UPSERT_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(6), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE - }, - //503 injected into all regions for QUERY_ITEM operation - //injected into all replicas of the faulty EPK range. - //Expectation is for the operation to hit 503 until fault injection has it its injection limits. - //After that, the operation should see a success from the first preferred region. - new Object[] { - String.format("Test with faulty %s with service unavailable error in all regions.", FaultInjectionOperationType.QUERY_ITEM), - new FaultInjectionRuleParamsWrapper() - .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) - .withFaultInjectionApplicableRegions(this.writeRegions) - .withHitLimit(11), - this.buildServiceUnavailableFaultInjectionRules, - NO_END_TO_END_TIMEOUT, - NO_REGION_SWITCH_HINT, - !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, - this.validateResponseHasServiceUnavailableError, - this.validateResponseHasSuccess, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, - this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE - }, + } }; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index e1ecdc6bcc01..329c9f1a8f05 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -385,9 +385,7 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( return Mono.just(ShouldRetryResult.noRetry()); } - this.serviceUnavailableRetryCount++; - - if (this.serviceUnavailableRetryCount > MaxServiceUnavailableRetryCount) { + if (this.serviceUnavailableRetryCount++ > MaxServiceUnavailableRetryCount) { logger.warn("shouldRetryOnBackendServiceUnavailableAsync() Not retrying. Retry count = {}", this.serviceUnavailableRetryCount); return Mono.just(ShouldRetryResult.noRetry()); } From c405061854d9722b71f3ec1f82492d1ff3ca2235 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 16 Jul 2024 19:56:11 -0400 Subject: [PATCH 132/140] Reacting to review comments. --- .../PartitionLevelCircuitBreakerTests.java | 589 +++++++++++++++--- 1 file changed, 489 insertions(+), 100 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java index 7ef79bfd2d42..1015adf98ffd 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/PartitionLevelCircuitBreakerTests.java @@ -117,6 +117,24 @@ public class PartitionLevelCircuitBreakerTests extends FaultInjectionTestBase { assertThat(ctx.getContactedRegionNames()).contains(this.secondPreferredRegion.toLowerCase(Locale.ROOT)); }; + Consumer validateDiagnosticsContextHasAnyTwoPreferredRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isEqualTo(2); + }; + + Consumer validateDiagnosticsContextHasAtMostTwoPreferredRegions = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isLessThanOrEqualTo(2); + }; + + Consumer validateDiagnosticsContextHasOnePreferredRegion = (ctx) -> { + assertThat(ctx).isNotNull(); + assertThat(ctx.getContactedRegionNames()).isNotNull(); + assertThat(ctx.getContactedRegionNames().size()).isLessThanOrEqualTo(1); + }; + Consumer validateDiagnosticsContextHasAllRegions = (ctx) -> { assertThat(ctx).isNotNull(); assertThat(ctx.getContactedRegionNames()).isNotNull(); @@ -291,7 +309,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -313,7 +334,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -335,7 +359,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for DELETE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -357,7 +384,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for PATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -379,7 +409,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -401,7 +434,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -424,7 +460,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for BATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -446,7 +485,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -467,7 +509,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -488,7 +533,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -510,7 +558,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -532,7 +583,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for DELETE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -554,7 +608,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for PATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -576,7 +633,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -598,7 +658,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -621,7 +684,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Response-delay injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -644,7 +710,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Response-delay injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -667,7 +736,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Response-delay injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -691,7 +763,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // Response-delay injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -715,7 +790,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -736,7 +814,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -758,7 +839,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -779,7 +863,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -802,7 +889,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -823,7 +913,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -845,7 +938,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -867,7 +963,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 404/1002 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -888,7 +987,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 404/1002 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -910,7 +1012,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -932,7 +1037,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -954,7 +1062,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -975,7 +1086,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range. @@ -996,7 +1110,10 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 449 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1017,7 +1134,82 @@ public Object[][] miscellaneousOpTestConfigsDirect() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 + }, + // 500 injected into all regions for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will have one region contacted post circuit breaking (one for QueryPlan and the other for the data plane request). + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAtMostTwoPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (effectively the primary since it is an upsert (write) operation). + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_DIRECT_MODE, + this.writeRegions.size(), + 25, + 15 } }; } @@ -1052,7 +1244,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for UPSERT_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1074,7 +1269,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for REPLACE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1096,7 +1294,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for DELETE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1118,7 +1319,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for PATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1140,7 +1344,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1162,7 +1369,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1185,7 +1395,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for BATCH_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1207,7 +1420,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // Server-generated 503 injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1228,7 +1444,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1249,7 +1468,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1271,7 +1493,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for READ_FEED_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1292,7 +1517,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // 500 (internal server error) injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1315,7 +1543,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // 429 injected into first preferred region for READ_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1336,7 +1567,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // 429 injected into first preferred region for CREATE_ITEM operation // injected into all replicas of the faulty EPK range (although only the primary replica @@ -1358,7 +1592,10 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 }, // 429 injected into first preferred region for QUERY_ITEM operation // injected into all replicas of the faulty EPK range. @@ -1380,7 +1617,82 @@ public Object[][] miscellaneousOpTestConfigsGateway() { this.validateDiagnosticsContextHasFirstAndSecondPreferredRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_GATEWAY_MODE + ONLY_GATEWAY_MODE, + 1, + 15, + 15 + }, + // 500 injected into all regions for QUERY_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will have one region contacted post circuit breaking (one for QueryPlan and the other for the data plane request). + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.QUERY_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + this.validateDiagnosticsContextHasAtMostTwoPreferredRegions, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for READ_ITEM operation + // injected into all replicas of the faulty EPK range. + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.READ_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.READ_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(11), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + this.writeRegions.size(), + 40, + 15 + }, + // 500 injected into all regions for UPSERT_ITEM operation + // injected into all replicas of the faulty EPK range (effectively the primary since it is an upsert (write) operation). + // Expectation is for the operation to see InternalServerError in all regions + // and will contact one region contacted post circuit breaking. + new Object[]{ + String.format("Test with faulty %s with internal server error in all preferred regions.", FaultInjectionOperationType.UPSERT_ITEM), + new FaultInjectionRuleParamsWrapper() + .withFaultInjectionOperationType(FaultInjectionOperationType.UPSERT_ITEM) + .withFaultInjectionApplicableRegions(this.writeRegions) + .withHitLimit(6), + this.buildInternalServerErrorFaultInjectionRules, + NO_END_TO_END_TIMEOUT, + NO_REGION_SWITCH_HINT, + !NON_IDEMPOTENT_WRITE_RETRIES_ENABLED, + this.validateResponseHasInternalServerError, + this.validateResponseHasSuccess, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasOnePreferredRegion, + this.validateDiagnosticsContextHasFirstPreferredRegionOnly, + ONLY_GATEWAY_MODE, + this.writeRegions.size(), + 25, + 15 } }; } @@ -1433,7 +1745,10 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 }, // Internal server error injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1454,7 +1769,10 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 }, // Server-generated 410 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1475,7 +1793,10 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 429 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1496,7 +1817,10 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 }, // 404/1002 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1517,29 +1841,35 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, - // 503 injected into all region for read many operation + // 500 injected into all region for read many operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to hit 503 and only to succeed when // fault injection has hit its injection limits. Also, the success is // from the first preferred region. { - "Test read many operation injected with service unavailable error in all regions.", + "Test read many operation injected with internal server error in all preferred regions.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions), - this.buildServiceUnavailableFaultInjectionRules, + this.buildInternalServerErrorFaultInjectionRules, executeReadManyOperation, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, - this.validateResponseHasServiceUnavailableError, + this.validateResponseHasInternalServerError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasOnePreferredRegion, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + this.writeRegions.size(), + 40, + 15 }, // 429 injected into first preferred region for read many operation // injected into all replicas of the faulty EPK range. @@ -1560,7 +1890,10 @@ public Object[][] readManyTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 } }; } @@ -1615,7 +1948,10 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 }, // Internal server error injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1636,7 +1972,10 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 }, // 410 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1657,7 +1996,10 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1678,7 +2020,10 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 }, // 404/1002 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1699,29 +2044,35 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ONLY_DIRECT_MODE + ONLY_DIRECT_MODE, + 1, + 15, + 15 }, - // 503 injected into all region for read all operation + // 500 injected into all region for read all operation // injected into all replicas of the faulty EPK range. // Expectation is for the operation to hit 503 and only to succeed when // fault injection has hit its injection limits. Also, the success is // from the first preferred region. { - "Test read all operation injected with service unavailable error in all regions.", + "Test read all operation injected with internal server error in all preferred regions.", new FaultInjectionRuleParamsWrapper() .withFaultInjectionOperationType(FaultInjectionOperationType.QUERY_ITEM) .withHitLimit(11) .withFaultInjectionApplicableRegions(this.writeRegions), - this.buildServiceUnavailableFaultInjectionRules, + this.buildInternalServerErrorFaultInjectionRules, executeReadAllOperation, NO_END_TO_END_TIMEOUT, NO_REGION_SWITCH_HINT, - this.validateResponseHasServiceUnavailableError, + this.validateResponseHasInternalServerError, this.validateResponseHasSuccess, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - this.validateDiagnosticsContextHasAllRegions, + this.validateDiagnosticsContextHasOnePreferredRegion, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + this.writeRegions.size(), + 40, + 15 }, // 429 injected into first preferred region for read all operation // injected into all replicas of the faulty EPK range. @@ -1742,12 +2093,15 @@ public Object[][] readAllTestConfigs() { this.validateDiagnosticsContextHasSecondPreferredRegionOnly, this.validateDiagnosticsContextHasAllRegions, this.validateDiagnosticsContextHasFirstPreferredRegionOnly, - ALL_CONNECTION_MODES_INCLUDED + ALL_CONNECTION_MODES_INCLUDED, + 1, + 15, + 15 } }; } - @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "miscellaneousOpTestConfigsDirect", timeOut = 80 * TIMEOUT) + @Test(groups = {"circuit-breaker-misc-direct"}, dataProvider = "miscellaneousOpTestConfigsDirect", timeOut = 4 * TIMEOUT) public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDirect( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -1760,7 +2114,10 @@ public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDir Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes) { + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { executeMiscOperationHitsTerminalExceptionAcrossKRegions( testId, @@ -1774,10 +2131,13 @@ public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsDir validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - allowedConnectionModes); + allowedConnectionModes, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); } - @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "miscellaneousOpTestConfigsGateway", timeOut = 80 * TIMEOUT) + @Test(groups = {"circuit-breaker-misc-gateway"}, dataProvider = "miscellaneousOpTestConfigsGateway", timeOut = 4 * TIMEOUT) public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsGateway( String testId, FaultInjectionRuleParamsWrapper faultInjectionRuleParamsWrapper, @@ -1790,7 +2150,10 @@ public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsGat Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes) { + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { executeMiscOperationHitsTerminalExceptionAcrossKRegions( testId, @@ -1804,7 +2167,10 @@ public void miscellaneousDocumentOperationHitsTerminalExceptionAcrossKRegionsGat validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - allowedConnectionModes); + allowedConnectionModes, + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); } private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( @@ -1819,7 +2185,10 @@ private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes) { + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { List preferredRegions = this.writeRegions; @@ -1910,8 +2279,9 @@ private void executeMiscOperationHitsTerminalExceptionAcrossKRegions( validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - 15, - 15); + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); } @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readManyTestConfigs", timeOut = 4 * TIMEOUT) @@ -1927,7 +2297,10 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes) { + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { List preferredRegions = this.writeRegions; @@ -2014,8 +2387,9 @@ public void readManyOperationHitsTerminalExceptionAcrossKRegions( validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - 15, - 15); + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); } @Test(groups = {"circuit-breaker-read-all-read-many"}, dataProvider = "readAllTestConfigs", timeOut = 4 * TIMEOUT) @@ -2031,7 +2405,10 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - Set allowedConnectionModes) { + Set allowedConnectionModes, + int expectedRegionCountWithFailures, + int operationIterationCountInFailureFlow, + int operationIterationCountInRecoveryFlow) { CosmosAsyncClient asyncClient = null; @@ -2119,8 +2496,9 @@ public void readAllOperationHitsTerminalExceptionAcrossKRegions( validateRegionsContactedWhenShortCircuitingHasKickedIn, validateRegionsContactedWhenExceptionBubblesUp, validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, - 15, - 15); + expectedRegionCountWithFailures, + operationIterationCountInFailureFlow, + operationIterationCountInRecoveryFlow); } private void execute( @@ -2135,6 +2513,7 @@ private void execute( Consumer validateRegionsContactedWhenShortCircuitingHasKickedIn, Consumer validateRegionsContactedWhenExceptionBubblesUp, Consumer validateRegionsContactedWhenShortCircuitRegionMarkedAsHealthyOrHealthyTentative, + int expectedRegionCountWithFailures, int operationIterationCountInFailureFlow, int operationIterationCountInRecoveryFlow) { @@ -2277,12 +2656,14 @@ private void execute( consecutiveExceptionBasedCircuitBreaker.getAllowedExceptionCountToMaintainStatus(LocationHealthStatus.HealthyWithFailures, true); if (!hasReachedCircuitBreakingThreshold) { + hasReachedCircuitBreakingThreshold = expectedCircuitBreakingThreshold == - getAverageExceptionCountByPartitionKeyRangeByRegion( + getEstimatedFailureCountSeenPerRegionPerPartitionKeyRange( partitionKeyRangeWrapper, partitionKeyRangeToLocationSpecificUnavailabilityInfo, - locationEndpointToLocationSpecificContextForPartitionField - ); + locationEndpointToLocationSpecificContextForPartitionField, + expectedCircuitBreakingThreshold, + expectedRegionCountWithFailures); validateResponseInPresenceOfFailures.accept(response); } else { executionCountAfterCircuitBreakingThresholdBreached++; @@ -3085,36 +3466,44 @@ private static Class getClassBySimpleName(Class[] classes, String classSim return null; } - private static int getAverageExceptionCountByPartitionKeyRangeByRegion( + private static double getEstimatedFailureCountSeenPerRegionPerPartitionKeyRange( PartitionKeyRangeWrapper partitionKeyRangeWrapper, ConcurrentHashMap partitionKeyRangeToLocationSpecificUnavailabilityInfo, - Field locationEndpointToLocationSpecificContextForPartitionField) throws IllegalAccessException { + Field locationEndpointToLocationSpecificContextForPartitionField, + int allowedExceptionCountToMaintainHealthyWithFailuresStatus, + int expectedRegionCountWithFailures) throws IllegalAccessException { Object partitionAndLocationSpecificUnavailabilityInfo = partitionKeyRangeToLocationSpecificUnavailabilityInfo.get(partitionKeyRangeWrapper); + if (partitionAndLocationSpecificUnavailabilityInfo == null) { + return 0d; + } + ConcurrentHashMap locationEndpointToLocationSpecificContextForPartition = (ConcurrentHashMap) locationEndpointToLocationSpecificContextForPartitionField.get(partitionAndLocationSpecificUnavailabilityInfo); int count = 0; - int regionCountWithFailures = 0; boolean failuresExist = false; for (LocationSpecificHealthContext locationSpecificHealthContext : locationEndpointToLocationSpecificContextForPartition.values()) { - count += locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() + locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); + + if (locationSpecificHealthContext.getLocationHealthStatus() == LocationHealthStatus.Unavailable) { + count += allowedExceptionCountToMaintainHealthyWithFailuresStatus; + } else { + count += locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() + locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); + } if (locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() + locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking() > 0) { failuresExist = true; - regionCountWithFailures++; } } if (failuresExist) { - return count / regionCountWithFailures; + return (count * 1.0d) / (expectedRegionCountWithFailures * 1.0d); } - return 0; - + return 0d; } private static FaultInjectionConnectionType evaluateFaultInjectionConnectionType(ConnectionMode connectionMode) { From 13a710fd81b5fe4ae68cf512a415c71811d2645b Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 17 Jul 2024 12:15:06 -0400 Subject: [PATCH 133/140] Reacting to review comments. --- ...EndpointManagerForCircuitBreakerTests.java | 12 +- .../com/azure/cosmos/CosmosDiagnostics.java | 14 ++ .../ClientSideRequestStatistics.java | 10 ++ .../ImplementationBridgeHelpers.java | 2 + ...ointOperationContextForCircuitBreaker.java | 9 +- .../implementation/RxDocumentClientImpl.java | 121 ++++++++++++------ 6 files changed, 123 insertions(+), 45 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java index 8be7c9ecf2fc..d76c5449fd14 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/GlobalPartitionEndpointManagerForCircuitBreakerTests.java @@ -3,15 +3,14 @@ package com.azure.cosmos; -import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.OperationType; import com.azure.cosmos.implementation.PartitionKeyRange; import com.azure.cosmos.implementation.PointOperationContextForCircuitBreaker; import com.azure.cosmos.implementation.ResourceType; -import com.azure.cosmos.implementation.RxDocumentClientImpl; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.SerializationDiagnosticsContext; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.circuitBreaker.GlobalPartitionEndpointManagerForCircuitBreaker; import com.azure.cosmos.implementation.circuitBreaker.LocationHealthStatus; @@ -25,7 +24,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; -import reactor.core.publisher.Flux; import java.lang.reflect.Field; import java.net.URI; @@ -917,7 +915,13 @@ private RxDocumentServiceRequest constructRxDocumentServiceRequestInstance( request.requestContext.resolvedPartitionKeyRange = new PartitionKeyRange(partitionKeyRangeId, minInclusive, maxExclusive); request.requestContext.locationEndpointToRoute = locationEndpointToRoute; request.requestContext.setExcludeRegions(Collections.emptyList()); - request.requestContext.setPointOperationContext(new PointOperationContextForCircuitBreaker(new AtomicBoolean(false), false, collectionLink, new MetadataDiagnosticsContext())); + request.requestContext.setPointOperationContext( + new PointOperationContextForCircuitBreaker( + new AtomicBoolean(false), + false, + collectionLink, + new MetadataDiagnosticsContext(), + new SerializationDiagnosticsContext())); return request; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java index 9f76557549ee..0e964bd550cf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosDiagnostics.java @@ -8,6 +8,7 @@ import com.azure.cosmos.implementation.ImplementationBridgeHelpers; import com.azure.cosmos.implementation.MetadataDiagnosticsContext; import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.SerializationDiagnosticsContext; import com.azure.cosmos.implementation.guava25.collect.ImmutableList; import com.azure.cosmos.util.Beta; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -499,6 +500,19 @@ public void mergeMetadataDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, clientSideRequestStatistics.mergeMetadataDiagnosticsContext(otherMetadataDiagnosticsContext); } } + + @Override + public void mergeSerializationDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, SerializationDiagnosticsContext otherSerializationDiagnosticsContext) { + if (cosmosDiagnostics == null) { + return; + } + + ClientSideRequestStatistics clientSideRequestStatistics = cosmosDiagnostics.clientSideRequestStatistics; + + if (clientSideRequestStatistics != null) { + clientSideRequestStatistics.mergeSerializationDiagnosticsContext(otherSerializationDiagnosticsContext); + } + } }); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java index 58d8b1063015..eb1eb492de22 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientSideRequestStatistics.java @@ -280,6 +280,16 @@ public void mergeMetadataDiagnosticsContext(MetadataDiagnosticsContext other) { } } + public void mergeSerializationDiagnosticsContext(SerializationDiagnosticsContext other) { + if (other == null || other.serializationDiagnosticsList == null || other.serializationDiagnosticsList.isEmpty()) { + return; + } + + for (SerializationDiagnosticsContext.SerializationDiagnostics serializationDiagnostics : other.serializationDiagnosticsList) { + this.serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); + } + } + public String recordAddressResolutionStart( URI targetEndpoint, boolean forceRefresh, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java index 74a965e03229..a8624059aa20 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ImplementationBridgeHelpers.java @@ -887,6 +887,8 @@ void recordAddressResolutionEnd( URI getFirstContactedLocationEndpoint(CosmosDiagnostics cosmosDiagnostics); void mergeMetadataDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, MetadataDiagnosticsContext otherMetadataDiagnosticsContext); + + void mergeSerializationDiagnosticContext(CosmosDiagnostics cosmosDiagnostics, SerializationDiagnosticsContext otherSerializationDiagnosticsContext); } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java index a5ad4adb767a..8f5bdc1ccd91 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/PointOperationContextForCircuitBreaker.java @@ -12,17 +12,20 @@ public class PointOperationContextForCircuitBreaker { private boolean isRequestHedged; private final String collectionLink; private final MetadataDiagnosticsContext metadataDiagnosticsContext; + private final SerializationDiagnosticsContext serializationDiagnosticsContext; public PointOperationContextForCircuitBreaker( AtomicBoolean hasOperationSeenSuccess, boolean isThresholdBasedAvailabilityStrategyEnabled, String collectionLink, - MetadataDiagnosticsContext metadataDiagnosticsContext) { + MetadataDiagnosticsContext metadataDiagnosticsContext, + SerializationDiagnosticsContext serializationDiagnosticsContext) { this.hasOperationSeenSuccess = hasOperationSeenSuccess; this.isThresholdBasedAvailabilityStrategyEnabled = isThresholdBasedAvailabilityStrategyEnabled; this.collectionLink = collectionLink; this.metadataDiagnosticsContext = metadataDiagnosticsContext; + this.serializationDiagnosticsContext = serializationDiagnosticsContext; } public void setIsRequestHedged(boolean isRequestHedged) { @@ -52,4 +55,8 @@ public String getCollectionLink() { public MetadataDiagnosticsContext getMetadataDiagnosticsContext() { return this.metadataDiagnosticsContext; } + + public SerializationDiagnosticsContext getSerializationDiagnosticsContext() { + return serializationDiagnosticsContext; + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 57127a8e3c2d..6fe7bfce8484 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -1747,7 +1747,7 @@ private Mono addPartitionKeyInformation(RxDocumentServ Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); return collectionObs .map(collectionValueHolder -> { - addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v); + addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v, null); return request; }); } @@ -1756,10 +1756,11 @@ private Mono addPartitionKeyInformation(RxDocumentServ ByteBuffer contentAsByteBuffer, Object document, RequestOptions options, - Mono> collectionObs) { + Mono> collectionObs, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { return collectionObs.map(collectionValueHolder -> { - addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v); + addPartitionKeyInformation(request, contentAsByteBuffer, document, options, collectionValueHolder.v, pointOperationContextForCircuitBreaker); return request; }); } @@ -1767,7 +1768,9 @@ private Mono addPartitionKeyInformation(RxDocumentServ private void addPartitionKeyInformation(RxDocumentServiceRequest request, ByteBuffer contentAsByteBuffer, Object objectDoc, RequestOptions options, - DocumentCollection collection) { + DocumentCollection collection, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { + PartitionKeyDefinition partitionKeyDefinition = collection.getPartitionKey(); PartitionKeyInternal partitionKeyInternal = null; @@ -1802,9 +1805,16 @@ private void addPartitionKeyInformation(RxDocumentServiceRequest request, serializationEndTime, SerializationDiagnosticsContext.SerializationType.PARTITION_KEY_FETCH_SERIALIZATION ); + SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); + } else if (pointOperationContextForCircuitBreaker != null) { + serializationDiagnosticsContext = pointOperationContextForCircuitBreaker.getSerializationDiagnosticsContext(); + + if (serializationDiagnosticsContext != null) { + serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); + } } } else { @@ -1863,17 +1873,13 @@ private Mono getCreateDocumentRequest(DocumentClientRe request.requestContext.setKeywordIdentifiers(options.getKeywordIdentifiers()); } - if (requestRetryPolicy != null) { - requestRetryPolicy.onBeforeSendRequest(request); - } - SerializationDiagnosticsContext serializationDiagnosticsContext = BridgeInternal.getSerializationDiagnosticsContext(request.requestContext.cosmosDiagnostics); if (serializationDiagnosticsContext != null) { serializationDiagnosticsContext.addSerializationDiagnostics(serializationDiagnostics); } Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); - return addPartitionKeyInformation(request, content, document, options, collectionObs); + return addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker); } private Mono getBatchDocumentRequest(DocumentClientRetryPolicy requestRetryPolicy, @@ -1938,7 +1944,8 @@ private Mono getBatchDocumentRequest(DocumentClientRet new AtomicBoolean(false), false, documentCollectionLink, - metadataDiagnosticsContext)); + metadataDiagnosticsContext, + serializationDiagnosticsContext)); return this.collectionCache.resolveCollectionAsync(metadataDiagnosticsContext, request) .flatMap(documentCollectionValueHolder -> this.partitionKeyRangeCache.tryLookupAsync(metadataDiagnosticsContext, documentCollectionValueHolder.v.getResourceId(), null, null) @@ -2324,10 +2331,17 @@ private Mono> createDocumentInternal( documentServiceRequestReference.set(request); request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request if (requestRetryPolicy != null) { requestRetryPolicy.onBeforeSendRequest(request); } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); + return create(request, requestRetryPolicy, getOperationContextAndListenerTuple(options)); }) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -2606,14 +2620,16 @@ private Mono> upsertDocumentInternal( request.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(request); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request if (retryPolicyInstance != null) { retryPolicyInstance.onBeforeSendRequest(request); } - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); return upsert(request, retryPolicyInstance, getOperationContextAndListenerTuple(options)); }) @@ -2871,7 +2887,7 @@ private Mono> replaceDocumentInternal( BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); Mono requestObs = - addPartitionKeyInformation(request, content, document, options, collectionObs); + addPartitionKeyInformation(request, content, document, options, collectionObs, pointOperationContextForCircuitBreaker); return requestObs .flatMap(req -> { @@ -2881,14 +2897,16 @@ private Mono> replaceDocumentInternal( req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request if (retryPolicyInstance != null) { retryPolicyInstance.onBeforeSendRequest(request); } - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); return replace(request, retryPolicyInstance); }) @@ -3051,7 +3069,8 @@ private Mono> patchDocumentInternal( null, null, options, - collectionObs); + collectionObs, + pointOperationContextForCircuitBreaker); return requestObs .flatMap(req -> { @@ -3061,14 +3080,16 @@ private Mono> patchDocumentInternal( req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request if (retryPolicyInstance != null) { retryPolicyInstance.onBeforeSendRequest(request); } - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); return patch(request, retryPolicyInstance); }) @@ -3190,7 +3211,7 @@ private Mono> deleteDocumentInternal( request); Mono requestObs = addPartitionKeyInformation( - request, null, internalObjectNode, options, collectionObs); + request, null, internalObjectNode, options, collectionObs, pointOperationContextForCircuitBreaker); return requestObs .flatMap(req -> { @@ -3200,14 +3221,16 @@ private Mono> deleteDocumentInternal( req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request if (retryPolicyInstance != null) { retryPolicyInstance.onBeforeSendRequest(request); } - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); return this.delete(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)); }) @@ -3245,7 +3268,7 @@ private Mono> deleteAllDocumentsByPartitionKeyInterna Mono> collectionObs = collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); - Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs); + Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, null); return requestObs.flatMap(req -> this .deleteAllItemsByPartitionKey(req, retryPolicyInstance, getOperationContextAndListenerTuple(options)) @@ -3338,7 +3361,7 @@ private Mono> readDocumentInternal( Mono> collectionObs = this.collectionCache.resolveCollectionAsync(BridgeInternal.getMetaDataDiagnosticContext(request.requestContext.cosmosDiagnostics), request); - Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs); + Mono requestObs = addPartitionKeyInformation(request, null, null, options, collectionObs, pointOperationContextForCircuitBreaker); return requestObs.flatMap(req -> { @@ -3347,16 +3370,16 @@ private Mono> readDocumentInternal( req.requestContext.setPointOperationContext(pointOperationContextForCircuitBreaker); requestReference.set(req); + // needs to be after addPartitionLevelUnavailableRegionsForRequest since onBeforeSendRequest uses + // excluded regions to know the next location endpoint to route the request to + // unavailable regions are effectively excluded regions for this request if (retryPolicyInstance != null) { retryPolicyInstance.onBeforeSendRequest(request); } - if (pointOperationContextForCircuitBreaker != null) { - MetadataDiagnosticsContext metadataDiagnosticsContext - = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); - - diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); - } + // needs to be after onBeforeSendRequest since CosmosDiagnostics instance needs to be wired + // to the RxDocumentServiceRequest instance + mergeContextInformationIntoDiagnosticsForPointRequest(request, pointOperationContextForCircuitBreaker); return this.read(req, retryPolicyInstance) .map(serviceResponse -> toResourceResponse(serviceResponse, Document.class)); @@ -5917,6 +5940,21 @@ public void addPartitionLevelUnavailableRegionsForRequest( } } + public void mergeContextInformationIntoDiagnosticsForPointRequest( + RxDocumentServiceRequest request, + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker) { + + if (pointOperationContextForCircuitBreaker != null) { + MetadataDiagnosticsContext metadataDiagnosticsContext + = pointOperationContextForCircuitBreaker.getMetadataDiagnosticsContext(); + SerializationDiagnosticsContext serializationDiagnosticsContext + = pointOperationContextForCircuitBreaker.getSerializationDiagnosticsContext(); + + diagnosticsAccessor.mergeMetadataDiagnosticContext(request.requestContext.cosmosDiagnostics, metadataDiagnosticsContext); + diagnosticsAccessor.mergeSerializationDiagnosticContext(request.requestContext.cosmosDiagnostics, serializationDiagnosticsContext); + } + } + public void addPartitionLevelUnavailableRegionsForFeedRequest( RxDocumentServiceRequest request, CosmosQueryRequestOptions options, @@ -6047,7 +6085,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat isOperationSuccessful, false, collectionLink, - metadataDiagnosticsContext); + metadataDiagnosticsContext, + new SerializationDiagnosticsContext()); pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); return callback.apply(nonNullRequestOptions, endToEndPolicyConfig, innerDiagnosticsFactory, pointOperationContextForCircuitBreakerForMainRequest, collectionRoutingMapValueHolder); @@ -6074,7 +6113,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat isOperationSuccessful, true, collectionLink, - metadataDiagnosticsContext); + metadataDiagnosticsContext, + new SerializationDiagnosticsContext()); pointOperationContextForCircuitBreakerForMainRequest.setIsRequestHedged(false); Mono initialMonoAcrossAllRegions = @@ -6110,7 +6150,8 @@ private Mono> wrapPointOperationWithAvailabilityStrat isOperationSuccessful, true, collectionLink, - metadataDiagnosticsContext); + metadataDiagnosticsContext, + new SerializationDiagnosticsContext()); pointOperationContextForCircuitBreakerForHedgedRequest.setIsRequestHedged(true); Mono regionalCrossRegionRetryMono = From 15fe11b784bcd04861f1ce239c17168976a6bf99 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 17 Jul 2024 15:17:11 -0400 Subject: [PATCH 134/140] Reacting to review comments. --- .../implementation/RxDocumentClientImpl.java | 54 ++++++++----------- ...nsecutiveExceptionBasedCircuitBreaker.java | 37 +++++++++---- ...itionEndpointManagerForCircuitBreaker.java | 9 ++-- ...pecificHealthContextTransitionHandler.java | 52 +++++++++++------- 4 files changed, 86 insertions(+), 66 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 6fe7bfce8484..e1a533e30996 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2401,45 +2401,15 @@ private Mono handleCircuitBreakingFeedbackForPointOperation( }) .doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { - - if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { - RxDocumentServiceRequest failedRequest = requestReference.get(); - checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); - - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); - checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); - - if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - - if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { - this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); - } - } else { - this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); - } - } + handleErroneousCasesForPointOperationsForCircuitBreaker(requestReference); } }) .doFinally(signalType -> { - if (signalType != SignalType.CANCEL) { return; } - if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { - RxDocumentServiceRequest potentiallyFailedRequest = requestReference.get(); - checkNotNull(potentiallyFailedRequest.requestContext, "Argument 'potentiallyFailedRequest.requestContext' must not be null!"); - - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = potentiallyFailedRequest.requestContext.getPointOperationContextForCircuitBreaker(); - checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); - - if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - - if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { - this.handleLocationCancellationExceptionForPartitionKeyRange(potentiallyFailedRequest); - } - } - } + handleErroneousCasesForPointOperationsForCircuitBreaker(requestReference); }); } @@ -6617,6 +6587,26 @@ private Mono executeFeedOperationWithAvailabilityStrategy( }); } + private void handleErroneousCasesForPointOperationsForCircuitBreaker(AtomicReference requestReference) { + + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest failedRequest = requestReference.get(); + checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); + + if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + + if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } else { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } + } + private void handleLocationCancellationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { URI firstContactedLocationEndpoint = diagnosticsAccessor diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 02d882e09076..1e0ca6ee8cb5 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -15,7 +15,11 @@ public ConsecutiveExceptionBasedCircuitBreaker(PartitionLevelCircuitBreakerConfi this.partitionLevelCircuitBreakerConfig = partitionLevelCircuitBreakerConfig; } - public LocationSpecificHealthContext handleException(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { + public LocationSpecificHealthContext handleException( + LocationSpecificHealthContext locationSpecificHealthContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String regionWithException, + boolean isReadOnlyRequest) { int exceptionCountAfterHandling = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); @@ -55,13 +59,22 @@ public LocationSpecificHealthContext handleException(LocationSpecificHealthConte .build(); } case Unavailable: - throw new IllegalStateException(); + logger.warn("Region {} should not be handling failures in {} health status for partition key range : {} and collection RID : {}", + regionWithException, + locationHealthStatus.getStringifiedLocationHealthStatus(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive() + "-" + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getCollectionResourceId()); default: - throw new IllegalArgumentException(); + throw new IllegalArgumentException("Unsupported health status : " + locationHealthStatus); } } - public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext locationSpecificHealthContext, boolean isReadOnlyRequest) { + public LocationSpecificHealthContext handleSuccess( + LocationSpecificHealthContext locationSpecificHealthContext, + PartitionKeyRangeWrapper partitionKeyRangeWrapper, + String regionWithSuccess, + boolean isReadOnlyRequest) { + int exceptionCountAfterHandling = (isReadOnlyRequest) ? locationSpecificHealthContext.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContext.getExceptionCountForWriteForCircuitBreaking(); @@ -129,9 +142,13 @@ public LocationSpecificHealthContext handleSuccess(LocationSpecificHealthContext } case Unavailable: - throw new IllegalStateException(); + logger.warn("Region {} should not be handling successes in {} health status for partition key range : {} and collection RID : {}", + regionWithSuccess, + locationHealthStatus.getStringifiedLocationHealthStatus(), + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive() + "-" + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), + partitionKeyRangeWrapper.getCollectionResourceId()); default: - throw new IllegalArgumentException(); + throw new IllegalArgumentException("Unsupported health status : " + locationHealthStatus); } } @@ -165,7 +182,7 @@ public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, case Unavailable: return 0; default: - throw new IllegalStateException("Unsupported health status: " + status); + throw new IllegalArgumentException("Unsupported health status: " + status); } } else { switch (status) { @@ -177,7 +194,7 @@ public int getAllowedExceptionCountToMaintainStatus(LocationHealthStatus status, case Unavailable: return 0; default: - throw new IllegalStateException("Unsupported health status: " + status); + throw new IllegalArgumentException("Unsupported health status: " + status); } } } @@ -192,7 +209,7 @@ public int getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus status, b case Healthy: return 0; default: - throw new IllegalStateException("Unsupported health status: " + status); + throw new IllegalArgumentException("Unsupported health status: " + status); } } else { switch (status) { @@ -203,7 +220,7 @@ public int getMinimumSuccessCountForStatusUpgrade(LocationHealthStatus status, b case Healthy: return 0; default: - throw new IllegalStateException("Unsupported health status: " + status); + throw new IllegalArgumentException("Unsupported health status: " + status); } } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java index 409cbfb0638c..3b4ca3a429b6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/GlobalPartitionEndpointManagerForCircuitBreaker.java @@ -14,6 +14,7 @@ import com.azure.cosmos.implementation.ResourceType; import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; +import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.tuple.Pair; import com.azure.cosmos.implementation.directconnectivity.GatewayAddressCache; import com.azure.cosmos.implementation.directconnectivity.GlobalAddressResolver; @@ -276,7 +277,7 @@ private Flux updateStaleLocationInfo() { .this.locationSpecificHealthContextTransitionHandler.handleSuccess( locationSpecificContextAsVal, partitionKeyRangeWrapper, - locationWithStaleUnavailabilityInfoAsKey, + this.locationToRegion.getOrDefault(locationWithStaleUnavailabilityInfoAsKey, StringUtils.EMPTY), false, true); } @@ -292,7 +293,7 @@ private Flux updateStaleLocationInfo() { .this.locationSpecificHealthContextTransitionHandler.handleSuccess( locationSpecificContextAsVal, partitionKeyRangeWrapper, - locationWithStaleUnavailabilityInfoAsKey, + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.getOrDefault(locationWithStaleUnavailabilityInfoAsKey, StringUtils.EMPTY), false, true); } @@ -385,7 +386,7 @@ private boolean handleException( locationSpecificContextAsVal, partitionKeyRangeWrapper, GlobalPartitionEndpointManagerForCircuitBreaker.this.partitionKeyRangesWithPossibleUnavailableRegions, - locationWithException, + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.getOrDefault(locationWithException, StringUtils.EMPTY), isReadOnlyRequest); @@ -434,7 +435,7 @@ private void handleSuccess( locationSpecificHealthContextAfterTransition = this.locationSpecificHealthContextTransitionHandler.handleSuccess( locationSpecificContextAsVal, partitionKeyRangeWrapper, - succeededLocation, + GlobalPartitionEndpointManagerForCircuitBreaker.this.locationToRegion.getOrDefault(succeededLocation, StringUtils.EMPTY), false, isReadOnlyRequest); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java index 2848803a8bd2..a3a34539df7f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/LocationSpecificHealthContextTransitionHandler.java @@ -32,7 +32,7 @@ public LocationSpecificHealthContextTransitionHandler( public LocationSpecificHealthContext handleSuccess( LocationSpecificHealthContext locationSpecificHealthContext, PartitionKeyRangeWrapper partitionKeyRangeWrapper, - URI locationWithSuccess, + String regionWithSuccess, boolean forceStatusChange, boolean isReadOnlyRequest) { @@ -48,7 +48,11 @@ public LocationSpecificHealthContext handleSuccess( if (!forceStatusChange) { if (exceptionCountActual > 0) { return this.consecutiveExceptionBasedCircuitBreaker - .handleSuccess(locationSpecificHealthContext, isReadOnlyRequest); + .handleSuccess( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithSuccess, + isReadOnlyRequest); } } break; @@ -57,7 +61,12 @@ public LocationSpecificHealthContext handleSuccess( if (!forceStatusChange) { LocationSpecificHealthContext locationSpecificHealthContextInner - = this.consecutiveExceptionBasedCircuitBreaker.handleSuccess(locationSpecificHealthContext, isReadOnlyRequest); + = this.consecutiveExceptionBasedCircuitBreaker + .handleSuccess( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithSuccess, + isReadOnlyRequest); if (this.consecutiveExceptionBasedCircuitBreaker.canHealthStatusBeUpgraded(locationSpecificHealthContextInner, isReadOnlyRequest)) { @@ -66,8 +75,7 @@ public LocationSpecificHealthContext handleSuccess( partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getCollectionResourceId(), - this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + regionWithSuccess); } return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Healthy); @@ -86,8 +94,7 @@ public LocationSpecificHealthContext handleSuccess( partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getCollectionResourceId(), - this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + regionWithSuccess); } return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); @@ -99,8 +106,7 @@ public LocationSpecificHealthContext handleSuccess( partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getCollectionResourceId(), - this.globalEndpointManager - .getRegionName(locationWithSuccess, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + regionWithSuccess); } return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyTentative); @@ -117,7 +123,7 @@ public LocationSpecificHealthContext handleException( LocationSpecificHealthContext locationSpecificHealthContext, PartitionKeyRangeWrapper partitionKeyRangeWrapper, ConcurrentHashMap partitionKeyRangesWithPossibleUnavailableRegions, - URI locationWithException, + String regionWithException, boolean isReadOnlyRequest) { LocationHealthStatus currentLocationHealthStatusSnapshot = locationSpecificHealthContext.getLocationHealthStatus(); @@ -130,8 +136,7 @@ public LocationSpecificHealthContext handleException( partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getCollectionResourceId(), - this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + regionWithException); } return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.HealthyWithFailures); @@ -139,7 +144,12 @@ public LocationSpecificHealthContext handleException( if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { LocationSpecificHealthContext locationSpecificHealthContextInner - = this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificHealthContext, isReadOnlyRequest); + = this.consecutiveExceptionBasedCircuitBreaker + .handleException( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithException, + isReadOnlyRequest); if (logger.isDebugEnabled()) { logger.debug("Partition {}-{} of collection : {} has exception count of {} for region : {}", @@ -147,8 +157,7 @@ public LocationSpecificHealthContext handleException( partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getCollectionResourceId(), isReadOnlyRequest ? locationSpecificHealthContextInner.getExceptionCountForReadForCircuitBreaking() : locationSpecificHealthContextInner.getExceptionCountForWriteForCircuitBreaking(), - this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + regionWithException); } return locationSpecificHealthContextInner; @@ -160,15 +169,19 @@ public LocationSpecificHealthContext handleException( partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getPartitionKeyRange(), - this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + regionWithException); } return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); } case HealthyTentative: if (!this.consecutiveExceptionBasedCircuitBreaker.shouldHealthStatusBeDowngraded(locationSpecificHealthContext, isReadOnlyRequest)) { - return this.consecutiveExceptionBasedCircuitBreaker.handleException(locationSpecificHealthContext, isReadOnlyRequest); + return this.consecutiveExceptionBasedCircuitBreaker + .handleException( + locationSpecificHealthContext, + partitionKeyRangeWrapper, + regionWithException, + isReadOnlyRequest); } else { if (logger.isDebugEnabled()) { @@ -176,8 +189,7 @@ public LocationSpecificHealthContext handleException( partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getPartitionKeyRange().getMaxExclusive(), partitionKeyRangeWrapper.getCollectionResourceId(), - this.globalEndpointManager - .getRegionName(locationWithException, (isReadOnlyRequest) ? OperationType.Read : OperationType.Create)); + regionWithException); } return this.transitionHealthStatus(locationSpecificHealthContext, LocationHealthStatus.Unavailable); From 36109f18455c7e2bd4c4d24765f47284ffe889d3 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Wed, 17 Jul 2024 15:58:16 -0400 Subject: [PATCH 135/140] Fixing CI pipeline. --- .../circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index 1e0ca6ee8cb5..dade8404f7fd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -64,6 +64,7 @@ public LocationSpecificHealthContext handleException( locationHealthStatus.getStringifiedLocationHealthStatus(), partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive() + "-" + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getCollectionResourceId()); + return locationSpecificHealthContext; default: throw new IllegalArgumentException("Unsupported health status : " + locationHealthStatus); } @@ -147,6 +148,7 @@ public LocationSpecificHealthContext handleSuccess( locationHealthStatus.getStringifiedLocationHealthStatus(), partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive() + "-" + partitionKeyRangeWrapper.getPartitionKeyRange().getMinInclusive(), partitionKeyRangeWrapper.getCollectionResourceId()); + return locationSpecificHealthContext; default: throw new IllegalArgumentException("Unsupported health status : " + locationHealthStatus); } From ca0313b5c0e4bb108bcfa4efc57a092cc23abaae Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Thu, 18 Jul 2024 19:52:11 -0400 Subject: [PATCH 136/140] Fixing CI pipeline. --- .../implementation/RxDocumentClientImpl.java | 52 +++++++++++-------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index e1a533e30996..9d2e98fb6acd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2401,7 +2401,22 @@ private Mono handleCircuitBreakingFeedbackForPointOperation( }) .doOnError(throwable -> { if (throwable instanceof OperationCancelledException) { - handleErroneousCasesForPointOperationsForCircuitBreaker(requestReference); + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest failedRequest = requestReference.get(); + checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); + + if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + + if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } else { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } } }) .doFinally(signalType -> { @@ -2409,7 +2424,20 @@ private Mono handleCircuitBreakingFeedbackForPointOperation( return; } - handleErroneousCasesForPointOperationsForCircuitBreaker(requestReference); + if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { + RxDocumentServiceRequest failedRequest = requestReference.get(); + checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); + + PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); + checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); + + if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { + + if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { + this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); + } + } + } }); } @@ -6587,26 +6615,6 @@ private Mono executeFeedOperationWithAvailabilityStrategy( }); } - private void handleErroneousCasesForPointOperationsForCircuitBreaker(AtomicReference requestReference) { - - if (this.globalPartitionEndpointManagerForCircuitBreaker.isPartitionLevelCircuitBreakingApplicable(requestReference.get())) { - RxDocumentServiceRequest failedRequest = requestReference.get(); - checkNotNull(failedRequest.requestContext, "Argument 'failedRequest.requestContext' must not be null!"); - - PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); - checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); - - if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { - - if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { - this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); - } - } else { - this.handleLocationCancellationExceptionForPartitionKeyRange(failedRequest); - } - } - } - private void handleLocationCancellationExceptionForPartitionKeyRange(RxDocumentServiceRequest failedRequest) { URI firstContactedLocationEndpoint = diagnosticsAccessor From e45f1e94ab3f214d4b4fa4d741256b7b9e8b09da Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 19 Jul 2024 10:53:37 -0400 Subject: [PATCH 137/140] Fixing CI pipeline. --- .../src/test/java/com/azure/cosmos/rx/TestSuiteBase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java index b015f15893e1..ee7ad051eb78 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/rx/TestSuiteBase.java @@ -204,7 +204,7 @@ public CosmosAsyncDatabase getDatabase(String id) { } } - @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SETUP_TIMEOUT) + @BeforeSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SETUP_TIMEOUT) public void beforeSuite() { logger.info("beforeSuite Started"); @@ -226,7 +226,7 @@ public static void parallelizeUnitTests(ITestContext context) { context.getSuite().getXmlSuite().setThreadCount(Runtime.getRuntime().availableProcessors()); } - @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) + @AfterSuite(groups = {"fast", "long", "direct", "multi-region", "multi-master", "flaky-multi-master", "emulator", "split", "query", "cfp-split", "circuit-breaker-misc-gateway", "circuit-breaker-misc-direct", "circuit-breaker-read-all-read-many"}, timeOut = SUITE_SHUTDOWN_TIMEOUT) public void afterSuite() { logger.info("afterSuite Started"); From f5a6218778a0e60f92b5d67e766a55018f707153 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Fri, 19 Jul 2024 11:10:45 -0400 Subject: [PATCH 138/140] Fixing CI pipeline. --- .../src/main/java/com/azure/cosmos/CosmosAsyncContainer.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java index 21fcc64bfa5a..e15a88e12618 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncContainer.java @@ -1488,7 +1488,7 @@ public Mono> readMany( CosmosReadManyRequestOptions options = new CosmosReadManyRequestOptions(); - if (!StringUtils.isNotEmpty(sessionToken)) { + if (StringUtils.isNotEmpty(sessionToken)) { options = options.setSessionToken(sessionToken); } From ac8d848996c77b22bd19e592b6a75cdfe06a742e Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 22 Jul 2024 12:07:09 -0400 Subject: [PATCH 139/140] Updated CHANGELOG.md. --- sdk/cosmos/azure-cosmos/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/CHANGELOG.md b/sdk/cosmos/azure-cosmos/CHANGELOG.md index 30ea01807327..b3d475cdc9c5 100644 --- a/sdk/cosmos/azure-cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure-cosmos/CHANGELOG.md @@ -5,8 +5,10 @@ #### Features Added * Added optional id validation to prevent documents with invalid char '/' in id property to be created. - See [PR 41108](https://github.com/Azure/azure-sdk-for-java/pull/41108) * Added support for specifying a set of custom diagnostic correlation ids in the request options. - See [PR 40835](https://github.com/Azure/azure-sdk-for-java/pull/40835) +* Added support for client-driven partition-level failover for multi-write CosmosDB accounts. - See[PR 39265](https://github.com/Azure/azure-sdk-for-java/pull/39265) #### Breaking Changes +* Fixed an issue where customer provided session token is not honored for the `readMany` operation. - See[PR 39265](https://github.com/Azure/azure-sdk-for-java/pull/39265) #### Bugs Fixed * Fixed an issue where `contactedRegions` shows the wrong region in a multi region account if no preferred regions are specified. - See [PR 41045](https://github.com/Azure/azure-sdk-for-java/pull/41045) From fb59af5f4e8741a796cc06002a489e70246a7a45 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Mon, 22 Jul 2024 13:08:30 -0400 Subject: [PATCH 140/140] Added code comments. --- .../com/azure/cosmos/implementation/RxDocumentClientImpl.java | 2 ++ .../ConsecutiveExceptionBasedCircuitBreaker.java | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 9d2e98fb6acd..2a8027d4f313 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -2431,6 +2431,8 @@ private Mono handleCircuitBreakingFeedbackForPointOperation( PointOperationContextForCircuitBreaker pointOperationContextForCircuitBreaker = failedRequest.requestContext.getPointOperationContextForCircuitBreaker(); checkNotNull(pointOperationContextForCircuitBreaker, "Argument 'pointOperationContextForCircuitBreaker' must not be null!"); + // scoping the handling of CANCEL signal handling for reasons outside of end-to-end operation timeout + // to purely operations which have end-to-end operation timeout enabled if (pointOperationContextForCircuitBreaker.isThresholdBasedAvailabilityStrategyEnabled()) { if (!pointOperationContextForCircuitBreaker.isRequestHedged() && pointOperationContextForCircuitBreaker.getHasOperationSeenSuccess()) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java index dade8404f7fd..e1b12d00a037 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/circuitBreaker/ConsecutiveExceptionBasedCircuitBreaker.java @@ -59,6 +59,8 @@ public LocationSpecificHealthContext handleException( .build(); } case Unavailable: + // the tests done so far view this as an unreachable piece of code - but not failing the operation + // with IllegalStateException and simply logging that a presumed unreachable code path seems to make sense for now logger.warn("Region {} should not be handling failures in {} health status for partition key range : {} and collection RID : {}", regionWithException, locationHealthStatus.getStringifiedLocationHealthStatus(), @@ -143,6 +145,8 @@ public LocationSpecificHealthContext handleSuccess( } case Unavailable: + // the tests done so far view this as an unreachable piece of code - but not failing the operation + // and simply logging that a presumed unreachable code path seems to make sense for now logger.warn("Region {} should not be handling successes in {} health status for partition key range : {} and collection RID : {}", regionWithSuccess, locationHealthStatus.getStringifiedLocationHealthStatus(),