From d72b90a57c350f44e49a378927b994d2ed091777 Mon Sep 17 00:00:00 2001 From: Neha Rao Date: Tue, 16 Sep 2025 07:07:34 -0700 Subject: [PATCH 1/7] progress --- .../src/main/java/module-info.java | 1 + .../com/azure/cosmos/BarrierRequestTests.java | 104 ++++++++++++++++++ .../RxDocumentClientUnderTest.java | 9 +- .../RxGatewayStoreModelTest.java | 10 +- .../SpyClientUnderTestFactory.java | 13 ++- .../ThinClientStoreModelTest.java | 3 +- .../com/azure/cosmos/CosmosAsyncClient.java | 11 ++ .../com/azure/cosmos/CosmosClientBuilder.java | 27 ++++- .../implementation/AsyncDocumentClient.java | 18 ++- .../DocumentServiceRequestContext.java | 2 + .../cosmos/implementation/HttpConstants.java | 1 + .../implementation/RxDocumentClientImpl.java | 29 +++-- .../implementation/RxGatewayStoreModel.java | 14 ++- .../implementation/ThinClientStoreModel.java | 7 +- .../directconnectivity/ConsistencyWriter.java | 25 ++++- .../JsonNodeStorePayload.java | 6 +- .../RntbdTransportClient.java | 10 ++ .../StoreClientFactory.java | 14 +++ .../directconnectivity/StoreResponse.java | 18 +++ .../FaultInjectionRequestContext.java | 2 +- .../implementation/routing/LocationCache.java | 2 +- .../routing/RegionalRoutingContext.java | 14 ++- 22 files changed, 308 insertions(+), 32 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java diff --git a/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java index ccaaf699f81e..f1e04b052d72 100644 --- a/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java +++ b/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java @@ -4,6 +4,7 @@ module com.azure.cosmos.test { requires transitive com.azure.cosmos; + requires reactor.core; exports com.azure.cosmos.test.faultinjection; } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java new file mode 100644 index 000000000000..811da503b646 --- /dev/null +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java @@ -0,0 +1,104 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.cosmos; + +import com.azure.cosmos.implementation.HttpConstants; +import com.azure.cosmos.implementation.InternalServerErrorException; +import com.azure.cosmos.implementation.OperationType; +import com.azure.cosmos.implementation.ResourceType; +import com.azure.cosmos.implementation.RxDocumentServiceResponse; +import com.azure.cosmos.implementation.directconnectivity.WFConstants; +import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.rx.TestSuiteBase; +import org.testng.annotations.Factory; +import org.testng.annotations.Test; + +import java.util.concurrent.atomic.AtomicBoolean; + +import static org.assertj.core.api.AssertionsForClassTypes.assertThat; + +/** + * Use fault injection to verify the handling of barrier requests for various scenarios. + */ +public class BarrierRequestTests extends TestSuiteBase { + String primaryRegion = "central us"; + String secondaryRegion = "east us"; + + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") + public BarrierRequestTests(CosmosClientBuilder clientBuilder) { + super(clientBuilder); + } + + @Test + public void validateBarriersOnFailover() { + AtomicBoolean simulateAddressRefreshFailures = new AtomicBoolean(false); + AtomicBoolean failoverTriggered = new AtomicBoolean(false); + + CosmosClientBuilder clientBuilder = getClientBuilder() + .consistencyLevel(ConsistencyLevel.STRONG) + .directMode(); + + clientBuilder.httpRequestInterceptor((request) -> { + // After the initial write, simulate a network failure on address resolution. + // This will trigger the SDK's failover logic. + if (request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.primaryRegion)) // Target the primary region + { + while (!failoverTriggered.compareAndSet(false, true)) { // Signal that the failover process has started + try { + Thread.sleep(2); + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + } + logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); + throw new InternalServerErrorException("Simulated network failure for address resolution.", HttpConstants.SubStatusCodes.UNKNOWN); + } + + // Once the failover is triggered, intercept the subsequent metadata refresh call. + if (failoverTriggered.get()) + { + // Return the modified account properties, making the SDK believe a failover has occurred. + logger.info("Intercepting metadata call and returning modified account properties. New write region: " + this.secondaryRegion); + return new RxDocumentServiceResponse(null, null); + } + + return null; // let other requests proceed normally + }); + + clientBuilder.storeResponseInterceptor((request, storeResponse) -> { + + if ((request.getOperationType() == OperationType.Create && request.getResourceType() == ResourceType.Document) + || request.getOperationType() == OperationType.Head) { + + String lsn = storeResponse.getHeaderValue(WFConstants.BackendHeaders.LSN); + + // Decrement so that GCLSN < LSN to simulate the replication lag + String manipulatedGclsn = String.valueOf(Long.parseLong(lsn) - 2L); + + storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, manipulatedGclsn); + + // Enable address refresh failures for subsequent barrier requests in the primary region. + simulateAddressRefreshFailures.set(true); + } + + return storeResponse; + }); + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + CosmosAsyncContainer container = getSharedSinglePartitionCosmosContainer(client); + + CosmosItemResponse response = container.createItem(CosmosDiagnosticsTest.TestItem.createNewItem()).block(); + validateDiagnosticsIsPresent(response); + + CosmosDiagnosticsContext diagnosticsContext = response.getDiagnostics().getDiagnosticsContext(); + System.out.println(diagnosticsContext); + } + + private void validateDiagnosticsIsPresent(CosmosItemResponse response) { + assertThat(response).isNotNull(); + assertThat(response.getStatusCode()).isNotNull(); + assertThat(response.getStatusCode()).isEqualTo(HttpConstants.StatusCodes.CREATED); + assertThat(response.getDiagnostics()).isNotNull(); + } +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java index a9f5cb35549c..a80bd0996f4d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.function.Function; import static org.mockito.Mockito.doAnswer; @@ -64,7 +65,7 @@ public RxDocumentClientUnderTest(URI serviceEndpoint, null, false ); - init(null, null); + init(null, null, null, null); } RxGatewayStoreModel createRxGatewayProxy( @@ -75,7 +76,8 @@ RxGatewayStoreModel createRxGatewayProxy( GlobalEndpointManager globalEndpointManager, GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker globalPartitionEndpointManagerForPerPartitionCircuitBreaker, HttpClient rxOrigClient, - ApiType apiType) { + ApiType apiType, + Function httpRequestInterceptor) { origHttpClient = rxOrigClient; spyHttpClient = Mockito.spy(rxOrigClient); @@ -93,6 +95,7 @@ RxGatewayStoreModel createRxGatewayProxy( userAgentContainer, globalEndpointManager, spyHttpClient, - apiType); + apiType, + httpRequestInterceptor); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java index 5a54c807b6eb..e018532b3b45 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxGatewayStoreModelTest.java @@ -99,7 +99,8 @@ public void readTimeout() throws Exception { userAgentContainer, globalEndpointManager, httpClient, - null); + null, + null); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName(clientContext, @@ -143,6 +144,7 @@ public void serviceUnavailable() throws Exception { userAgentContainer, globalEndpointManager, httpClient, + null, null); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); @@ -202,7 +204,8 @@ public void applySessionToken( new UserAgentContainer(), globalEndpointManager, httpClient, - apiType); + apiType, + null); storeModel.setGatewayServiceConfigurationReader(gatewayServiceConfigurationReader); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName( @@ -274,7 +277,8 @@ public void validateApiType() throws Exception { new UserAgentContainer(), globalEndpointManager, httpClient, - apiType); + apiType, + null); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName( clientContext, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index b06d6f89b8e9..e18dff8e6aa3 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; +import java.util.function.Function; import java.util.stream.Collectors; import static org.mockito.Mockito.doAnswer; @@ -110,7 +111,7 @@ public static class ClientWithGatewaySpy extends SpyBaseClass httpRequestInterceptor) { this.origRxGatewayStoreModel = super.createRxGatewayProxy( sessionContainer, consistencyLevel, @@ -134,7 +136,8 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, userAgentContainer, globalEndpointManager, rxClient, - apiType); + apiType, + httpRequestInterceptor); this.requests = Collections.synchronizedList(new ArrayList<>()); this.spyRxGatewayStoreModel = Mockito.spy(this.origRxGatewayStoreModel); this.initRequestCapture(); @@ -201,7 +204,7 @@ public static class ClientUnderTest extends SpyBaseClass { credential, contentResponseOnWriteEnabled, clientTelemetryConfig); - init(null, this::initHttpRequestCapture); + init(null, this::initHttpRequestCapture, null, null); } private Mono captureHttpRequest(InvocationOnMock invocationOnMock) { @@ -287,7 +290,7 @@ public static class DirectHttpsClientUnderTest extends SpyBaseClass contentResponseOnWriteEnabled, clientTelemetryConfig); assert connectionPolicy.getConnectionMode() == ConnectionMode.DIRECT; - init(null, null); + init(null, null, null, null); this.origHttpClient = ReflectionUtils.getDirectHttpsHttpClient(this); this.spyHttpClient = spy(this.origHttpClient); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java index 64cd7fe37115..b1794724680d 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java @@ -44,7 +44,8 @@ public void testThinClientStoreModel() throws Exception { ConsistencyLevel.SESSION, new UserAgentContainer(), globalEndpointManager, - httpClient); + httpClient, + null); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName( clientContext, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java index 680b98a6a915..6453cfab123f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java @@ -19,6 +19,8 @@ import com.azure.cosmos.implementation.QueryFeedOperationState; import com.azure.cosmos.implementation.RequestOptions; import com.azure.cosmos.implementation.ResourceType; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.RxDocumentServiceResponse; import com.azure.cosmos.implementation.Strings; import com.azure.cosmos.implementation.WriteRetryPolicy; import com.azure.cosmos.implementation.clienttelemetry.ClientMetricsDiagnosticsHandler; @@ -27,6 +29,7 @@ import com.azure.cosmos.implementation.clienttelemetry.CosmosMeterOptions; import com.azure.cosmos.implementation.clienttelemetry.MetricCategory; import com.azure.cosmos.implementation.clienttelemetry.TagName; +import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdMetrics; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; import com.azure.cosmos.implementation.throughputControl.sdk.config.SDKThroughputControlGroupInternal; @@ -60,6 +63,7 @@ import java.util.EnumSet; import java.util.List; import java.util.Objects; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; @@ -115,6 +119,8 @@ public final class CosmosAsyncClient implements Closeable { private final List requestPolicies; private final CosmosItemSerializer defaultCustomSerializer; private final java.util.function.Function containerFactory; + private final BiFunction storeResponseInterceptor; + private Function httpRequestInterceptor; CosmosAsyncClient(CosmosClientBuilder builder) { // Async Cosmos client wrapper @@ -134,6 +140,9 @@ public final class CosmosAsyncClient implements Closeable { this.nonIdempotentWriteRetryPolicy = builder.getNonIdempotentWriteRetryPolicy(); this.requestPolicies = builder.getOperationPolicies(); this.defaultCustomSerializer = builder.getCustomItemSerializer(); + this.storeResponseInterceptor = builder.getStoreResponseInterceptor(); + this.httpRequestInterceptor = builder.getHttpRequestInterceptor(); + if (builder.containerCreationInterceptor() != null) { this.containerFactory = builder.containerCreationInterceptor(); } else { @@ -185,6 +194,8 @@ public final class CosmosAsyncClient implements Closeable { .withDefaultSerializer(this.defaultCustomSerializer) .withRegionScopedSessionCapturingEnabled(builder.isRegionScopedSessionCapturingEnabled()) .withPerPartitionAutomaticFailoverEnabled(builder.isPerPartitionAutomaticFailoverEnabled()) + .withStoreResponseInterceptor(this.storeResponseInterceptor) + .withHttpRequestInterceptor(this.httpRequestInterceptor) .build(); this.accountConsistencyLevel = this.asyncDocumentClient.getDefaultConsistencyLevelOfAccount(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 12d022e69ee7..012f3f6bb6e3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -13,13 +13,16 @@ import com.azure.cosmos.implementation.ConnectionPolicy; import com.azure.cosmos.implementation.CosmosClientMetadataCachesSnapshot; import com.azure.cosmos.implementation.DiagnosticsProvider; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; +import com.azure.cosmos.implementation.RxDocumentServiceResponse; import com.azure.cosmos.implementation.Strings; import com.azure.cosmos.implementation.WriteRetryPolicy; import com.azure.cosmos.implementation.apachecommons.collections.list.UnmodifiableList; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.apachecommons.lang.time.StopWatch; +import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.guava25.base.Preconditions; -import com.azure.cosmos.implementation.perPartitionCircuitBreaker.PartitionLevelCircuitBreakerConfig; +import com.azure.cosmos.implementation.http.HttpClient; import com.azure.cosmos.implementation.routing.LocationHelper; import com.azure.cosmos.models.CosmosAuthorizationTokenResolver; import com.azure.cosmos.models.CosmosClientTelemetryConfig; @@ -39,6 +42,7 @@ import java.util.Locale; import java.util.Objects; import java.util.Set; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.function.Supplier; @@ -155,6 +159,8 @@ public class CosmosClientBuilder implements private boolean serverCertValidationDisabled = false; private Function containerFactory = null; + private BiFunction storeResponseInterceptor = null; + private Function httpRequestInterceptor = null; /** * Instantiates a new Cosmos client builder. @@ -170,6 +176,25 @@ public CosmosClientBuilder() { this.requestPolicies = new LinkedList<>(); } + CosmosClientBuilder httpRequestInterceptor(Function httpRequestInterceptor) { + this.httpRequestInterceptor = httpRequestInterceptor; + return this; + } + + Function getHttpRequestInterceptor() { + return this.httpRequestInterceptor; + } + + CosmosClientBuilder storeResponseInterceptor( + BiFunction storeResponseInterceptor) { + this.storeResponseInterceptor = storeResponseInterceptor; + return this; + } + + BiFunction getStoreResponseInterceptor() { + return this.storeResponseInterceptor; + } + CosmosClientBuilder metadataCaches(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot) { this.state = metadataCachesSnapshot; return this; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index f19ccb503027..d852e6324bbf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -15,6 +15,8 @@ import com.azure.cosmos.implementation.batch.ServerBatchRequest; import com.azure.cosmos.implementation.caches.RxClientCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; +import com.azure.cosmos.implementation.directconnectivity.StoreResponse; +import com.azure.cosmos.implementation.http.HttpClient; import com.azure.cosmos.implementation.perPartitionAutomaticFailover.GlobalPartitionEndpointManagerForPerPartitionAutomaticFailover; import com.azure.cosmos.implementation.perPartitionCircuitBreaker.GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; @@ -42,6 +44,8 @@ import java.net.URISyntaxException; import java.util.List; import java.util.Map; +import java.util.function.BiFunction; +import java.util.function.Function; /** * Provides a client-side logical representation of the Azure Cosmos DB @@ -115,6 +119,8 @@ class Builder { private boolean isRegionScopedSessionCapturingEnabled; private boolean isPerPartitionAutomaticFailoverEnabled; private List operationPolicies; + private BiFunction storeResponseInterceptor; + private Function httpRequestInterceptor; public Builder withServiceEndpoint(String serviceEndpoint) { try { @@ -287,6 +293,16 @@ public Builder withPerPartitionAutomaticFailoverEnabled(boolean isPerPartitionAu return this; } + public Builder withStoreResponseInterceptor(BiFunction storeResponseInterceptor) { + this.storeResponseInterceptor = storeResponseInterceptor; + return this; + } + + public Builder withHttpRequestInterceptor(Function httpRequestInterceptor) { + this.httpRequestInterceptor = httpRequestInterceptor; + return this; + } + private void ifThrowIllegalArgException(boolean value, String error) { if (value) { throw new IllegalArgumentException(error); @@ -329,7 +345,7 @@ public AsyncDocumentClient build() { operationPolicies, isPerPartitionAutomaticFailoverEnabled); - client.init(state, null); + client.init(state, null, httpRequestInterceptor, storeResponseInterceptor); return client; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java index 18da5250c458..f04e00669063 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/DocumentServiceRequestContext.java @@ -38,6 +38,7 @@ public class DocumentServiceRequestContext implements Cloneable { public volatile ISessionToken sessionToken; public volatile long quorumSelectedLSN; public volatile long globalCommittedSelectedLSN; + public volatile String globalStrongWriteRegion; public volatile StoreResponse globalStrongWriteResponse; public volatile ConsistencyLevel originalRequestConsistencyLevel; public volatile ReadConsistencyStrategy readConsistencyStrategy; @@ -148,6 +149,7 @@ public DocumentServiceRequestContext clone() { context.sessionToken = this.sessionToken; context.quorumSelectedLSN = this.quorumSelectedLSN; context.globalCommittedSelectedLSN = this.globalCommittedSelectedLSN; + context.globalStrongWriteRegion = this.globalStrongWriteRegion; context.globalStrongWriteResponse = this.globalStrongWriteResponse; context.originalRequestConsistencyLevel = this.originalRequestConsistencyLevel; context.readConsistencyStrategy = this.readConsistencyStrategy; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java index fea6efeccfc6..64c0e224d0b3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/HttpConstants.java @@ -449,6 +449,7 @@ public static class SubStatusCodes { public static final int CLIENT_OPERATION_TIMEOUT = 20008; // Sub-status code paired with 408 status code public static final int TRANSIT_TIMEOUT = 20911; + public static final int WRITE_REGION_BARRIER_CHANGED_MID_OPERATION = 20912; // IMPORTANT - below sub status codes have no corresponding .Net // version, because they are only applicable in Java diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 9c536ac1ca8d..a7c8b0d6ec98 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -40,6 +40,7 @@ import com.azure.cosmos.implementation.directconnectivity.ServerStoreModel; import com.azure.cosmos.implementation.directconnectivity.StoreClient; import com.azure.cosmos.implementation.directconnectivity.StoreClientFactory; +import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.faultinjection.IFaultInjectorProvider; import com.azure.cosmos.implementation.feedranges.FeedRangeEpkImpl; import com.azure.cosmos.implementation.http.HttpClient; @@ -718,7 +719,10 @@ private void updateThinProxy() { (this.thinProxy).setSessionContainer(this.sessionContainer); } - public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Function httpClientInterceptor) { + public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, + Function httpClientInterceptor, + Function httpRequestInterceptor, + BiFunction storeResponseInterceptor) { try { this.httpClientInterceptor = httpClientInterceptor; @@ -732,13 +736,15 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func this.userAgentContainer, this.globalEndpointManager, this.reactorHttpClient, - this.apiType); + this.apiType, + httpRequestInterceptor); this.thinProxy = createThinProxy(this.sessionContainer, this.consistencyLevel, this.userAgentContainer, this.globalEndpointManager, - this.reactorHttpClient); + this.reactorHttpClient, + httpRequestInterceptor); this.globalEndpointManager.init(); @@ -794,7 +800,7 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Func if (this.connectionPolicy.getConnectionMode() == ConnectionMode.GATEWAY) { this.storeModel = this.gatewayProxy; } else { - this.initializeDirectConnectivity(); + this.initializeDirectConnectivity(storeResponseInterceptor); } this.retryPolicy.setRxCollectionCache(this.collectionCache); ConsistencyLevel effectiveConsistencyLevel = consistencyLevel != null @@ -818,7 +824,7 @@ public void serialize(CosmosClientMetadataCachesSnapshot state) { RxCollectionCache.serialize(state, this.collectionCache); } - private void initializeDirectConnectivity() { + private void initializeDirectConnectivity(BiFunction rntbdTransportClientStoreResponseInterceptor) { this.addressResolver = new GlobalAddressResolver(this, this.reactorHttpClient, this.globalEndpointManager, @@ -844,6 +850,7 @@ private void initializeDirectConnectivity() { this.clientTelemetry, this.globalEndpointManager); + this.storeClientFactory.setStoreResponseInterceptorIfRntbdTransportClient(rntbdTransportClientStoreResponseInterceptor); this.globalPartitionEndpointManagerForPerPartitionCircuitBreaker.setGlobalAddressResolver(this.addressResolver); this.createStoreModel(true); } @@ -875,7 +882,8 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - ApiType apiType) { + ApiType apiType, + Function httpRequestInterceptor) { return new RxGatewayStoreModel( this, sessionContainer, @@ -884,21 +892,24 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, userAgentContainer, globalEndpointManager, httpClient, - apiType); + apiType, + httpRequestInterceptor); } ThinClientStoreModel createThinProxy(ISessionContainer sessionContainer, ConsistencyLevel consistencyLevel, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, - HttpClient httpClient) { + HttpClient httpClient, + Function httpRequestInterceptor) { return new ThinClientStoreModel( this, sessionContainer, consistencyLevel, userAgentContainer, globalEndpointManager, - httpClient); + httpClient, + httpRequestInterceptor); } private HttpClient httpClient() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 676860013631..e2f2f857b593 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -50,6 +50,7 @@ import java.util.Map.Entry; import java.util.Objects; import java.util.concurrent.Callable; +import java.util.function.Function; import static com.azure.cosmos.implementation.HttpConstants.HttpHeaders.INTENDED_COLLECTION_RID_HEADER; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkNotNull; @@ -79,6 +80,7 @@ public class RxGatewayStoreModel implements RxStoreModel, HttpTransportSerialize private GatewayServiceConfigurationReader gatewayServiceConfigurationReader; private RxClientCollectionCache collectionCache; private GatewayServerErrorInjector gatewayServerErrorInjector; + private Function httpRequestInterceptor; public RxGatewayStoreModel( DiagnosticsClientContext clientContext, @@ -88,7 +90,8 @@ public RxGatewayStoreModel( UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - ApiType apiType) { + ApiType apiType, + Function httpRequestInterceptor) { this.clientContext = clientContext; @@ -104,6 +107,8 @@ public RxGatewayStoreModel( this.httpClient = httpClient; this.sessionContainer = sessionContainer; + + this.httpRequestInterceptor = httpRequestInterceptor; } public RxGatewayStoreModel(RxGatewayStoreModel inner) { @@ -300,6 +305,13 @@ public Mono performRequestInternal(RxDocumentServiceR private Mono performRequestInternalCore(RxDocumentServiceRequest request, URI requestUri) { try { + if (this.httpRequestInterceptor != null) { + RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request); + if (result != null) { + return Mono.just(result); + } + } + HttpRequest httpRequest = request .getEffectiveHttpTransportSerializer(this) .wrapInHttpRequest(request, requestUri); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java index 3922ca187db7..2c039466e5ad 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java @@ -25,6 +25,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.function.Function; /** * While this class is public, but it is not part of our published public APIs. @@ -45,7 +46,8 @@ public ThinClientStoreModel( ConsistencyLevel defaultConsistencyLevel, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, - HttpClient httpClient) { + HttpClient httpClient, + Function httpRequestInterceptor) { super( clientContext, sessionContainer, @@ -54,7 +56,8 @@ public ThinClientStoreModel( userAgentContainer, globalEndpointManager, httpClient, - ApiType.SQL); + ApiType.SQL, + httpRequestInterceptor); String userAgent = userAgentContainer != null ? userAgentContainer.getUserAgent() diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java index 92a781cf071c..0da969ce71be 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java @@ -27,7 +27,6 @@ import com.azure.cosmos.implementation.Strings; import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.collections.ComparatorUtils; -import com.azure.cosmos.implementation.directconnectivity.rntbd.ClosedClientTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import reactor.core.Exceptions; @@ -44,6 +43,7 @@ import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; @@ -338,6 +338,7 @@ Mono barrierForGlobalStrong(RxDocumentServiceRequest request, Sto throw new GoneException(RMResources.Gone, HttpConstants.SubStatusCodes.SERVER_GENERATED_410); } + request.requestContext.globalStrongWriteRegion = request.requestContext.regionalRoutingContextToRoute.getRegion(); request.requestContext.globalStrongWriteResponse = response; request.requestContext.globalCommittedSelectedLSN = lsn.v; @@ -384,10 +385,32 @@ Mono barrierForGlobalStrong(RxDocumentServiceRequest request, Sto } } + private void validateGlobalStrongWriteRegion(RxDocumentServiceRequest barrierRequest) + { + // validate that a regional failover has not occurred since the initial write. + String currentRegion = barrierRequest.requestContext.regionalRoutingContextToRoute.getRegion(); + if (barrierRequest.requestContext.globalStrongWriteRegion != null && + !Objects.equals(barrierRequest.requestContext.globalStrongWriteRegion, currentRegion)) + { + logger.info( + "ConsistencyWriter: Failover detected during strong consistency write. Original write was to region " + + barrierRequest.requestContext.globalStrongWriteRegion + " but retry is targeting currentRegion " + + currentRegion + ". Failing request."); + + throw new RequestTimeoutException( + "The write operation was initiated in region " + barrierRequest.requestContext.globalStrongWriteRegion + + " but a regional failover occurred. The current attempt is to endpoint " + currentRegion + + ". The state of the write is ambiguous.", + null, + HttpConstants.SubStatusCodes.WRITE_REGION_BARRIER_CHANGED_MID_OPERATION); + } + } + private Mono waitForWriteBarrierAsync(RxDocumentServiceRequest barrierRequest, long selectedGlobalCommittedLsn) { AtomicInteger writeBarrierRetryCount = new AtomicInteger(ConsistencyWriter.MAX_NUMBER_OF_WRITE_BARRIER_READ_RETRIES); AtomicLong maxGlobalCommittedLsnReceived = new AtomicLong(0); return Flux.defer(() -> { + this.validateGlobalStrongWriteRegion(barrierRequest); if (barrierRequest.requestContext.timeoutHelper.isElapsed()) { return Flux.error(new RequestTimeoutException()); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java index 3203878189fa..bf95209cb896 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/JsonNodeStorePayload.java @@ -21,7 +21,7 @@ public class JsonNodeStorePayload implements StorePayload { private static final Logger logger = LoggerFactory.getLogger(JsonNodeStorePayload.class); private static final CharsetDecoder fallbackCharsetDecoder = getFallbackCharsetDecoder(); private final int responsePayloadSize; - private final JsonNode jsonValue; + private JsonNode jsonValue; public JsonNodeStorePayload(ByteBufInputStream bufferStream, int readableBytes) { if (readableBytes > 0) { @@ -72,6 +72,10 @@ public JsonNode getPayload() { return jsonValue; } + public void setPayload(JsonNode payload) { + jsonValue = payload; + } + private static CharsetDecoder getFallbackCharsetDecoder() { if (StringUtil.isNullOrEmpty(Configs.getCharsetDecoderErrorActionOnMalformedInput()) && StringUtil.isNullOrEmpty(Configs.getCharsetDecoderErrorActionOnUnmappedCharacter())) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/RntbdTransportClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/RntbdTransportClient.java index 22c5ed8624b6..bf693a1bd16c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/RntbdTransportClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/RntbdTransportClient.java @@ -63,6 +63,7 @@ import java.util.concurrent.CompletionException; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiFunction; import java.util.function.Consumer; import static com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdReporter.reportIssue; @@ -114,6 +115,7 @@ public class RntbdTransportClient extends TransportClient { private final RntbdServerErrorInjector serverErrorInjector; private final ProactiveOpenConnectionsProcessor proactiveOpenConnectionsProcessor; private final AddressSelector addressSelector; + private BiFunction storeResponseInterceptor; // endregion @@ -319,6 +321,10 @@ public Mono invokeStoreAsync(final Uri addressUri, final RxDocume storeResponse.setChannelAcquisitionTimeline(record.getChannelAcquisitionTimeline()); } + if (this.storeResponseInterceptor != null) { + return this.storeResponseInterceptor.apply(request, storeResponse); + } + return storeResponse; }).onErrorMap(throwable -> { @@ -492,6 +498,10 @@ private boolean shouldRecordChannelAcquisitionTimeline(RequestTimeline requestTi channelAcquisitionEvent.get().getDuration().toMillis() > this.channelAcquisitionContextLatencyThresholdInMillis; } + public void setStoreResponseInterceptor(BiFunction storeResponseInterceptor) { + this.storeResponseInterceptor = storeResponseInterceptor; + } + // endregion // region Types diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClientFactory.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClientFactory.java index 16f5124a1895..7e8643065f29 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClientFactory.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreClientFactory.java @@ -10,9 +10,12 @@ import com.azure.cosmos.implementation.GlobalEndpointManager; import com.azure.cosmos.implementation.IAuthorizationTokenProvider; import com.azure.cosmos.implementation.ISessionContainer; +import com.azure.cosmos.implementation.RxDocumentServiceRequest; import com.azure.cosmos.implementation.UserAgentContainer; import com.azure.cosmos.implementation.clienttelemetry.ClientTelemetry; +import java.util.function.BiFunction; + // We suppress the "try" warning here because the close() method's signature // allows it to throw InterruptedException which is strongly advised against // by AutoCloseable (see: http://docs.oracle.com/javase/7/docs/api/java/lang/AutoCloseable.html#close()). @@ -98,6 +101,17 @@ public StoreClient createStoreClient( sessionRetryOptions); } + public void setStoreResponseInterceptorIfRntbdTransportClient( + BiFunction storeResponseInterceptor) { + this.throwIfClosed(); + + if (this.transportClient instanceof RntbdTransportClient) { + ((RntbdTransportClient) this.transportClient).setStoreResponseInterceptor(storeResponseInterceptor); + } else { + throw new IllegalStateException("StoreResponseInterceptor can only be set for RntbdTransportClient"); + } + } + private void throwIfClosed() { if (isClosed) { throw new IllegalStateException("storeClient already closed!"); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java index 158207d9b569..ed804760bfca 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreResponse.java @@ -153,6 +153,10 @@ public JsonNode getResponseBodyAsJson() { return this.responsePayload.getPayload(); } + public void setResponseBodyAsJson(JsonNode body) { + this.responsePayload.setPayload(body); + } + public int getResponseBodyLength() { if (this.responsePayload == null) { return 0; @@ -196,6 +200,20 @@ public String getHeaderValue(String attribute) { return null; } + // meant for fault injection only + public void setHeaderValue(String attribute, String value) { + if (this.responseHeaderValues == null || this.responseHeaderNames.length != this.responseHeaderValues.length) { + return; + } + + for (int i = 0; i < responseHeaderNames.length; i++) { + if (responseHeaderNames[i].equalsIgnoreCase(attribute)) { + responseHeaderValues[i] = value; + return; + } + } + } + public double getRequestCharge() { String value = this.getHeaderValue(HttpConstants.HttpHeaders.REQUEST_CHARGE); if (StringUtils.isEmpty(value)) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/faultinjection/FaultInjectionRequestContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/faultinjection/FaultInjectionRequestContext.java index 79b6847c3fa0..b6ceadddec84 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/faultinjection/FaultInjectionRequestContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/faultinjection/FaultInjectionRequestContext.java @@ -99,7 +99,7 @@ public String getFaultInjectionRuleId(long transportRequestId) { public void setLocationEndpointToRoute(URI locationEndpointToRoute) { this.locationEndpointToRoute = locationEndpointToRoute; - this.regionalRoutingContextToRoute = new RegionalRoutingContext(locationEndpointToRoute); + this.regionalRoutingContextToRoute = new RegionalRoutingContext(locationEndpointToRoute, null); } public URI getLocationEndpointToRoute() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index 37bd06e6b0bc..0087203f1e57 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -943,7 +943,7 @@ private void addRoutingContexts( String location = gatewayDbAccountLocation.getName().toLowerCase(Locale.ROOT); URI endpoint = new URI(gatewayDbAccountLocation.getEndpoint().toLowerCase(Locale.ROOT)); - RegionalRoutingContext regionalRoutingContext = new RegionalRoutingContext(endpoint); + RegionalRoutingContext regionalRoutingContext = new RegionalRoutingContext(endpoint, location); if (!endpointsByLocation.containsKey(location)) { endpointsByLocation.put(location, regionalRoutingContext); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/RegionalRoutingContext.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/RegionalRoutingContext.java index 641637d5c81b..3becdfdeed93 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/RegionalRoutingContext.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/RegionalRoutingContext.java @@ -11,16 +11,26 @@ public class RegionalRoutingContext { // IMPORTANT: // Please reevaluate overridden equals() implementation // when adding additional properties to this class + private final String region; private final URI gatewayRegionalEndpoint; private final String gatewayRegionalEndpointAsString; private URI thinclientRegionalEndpoint; private String thinclientRegionalEndpointAsString; - public RegionalRoutingContext(URI gatewayRegionalEndpoint) { + public RegionalRoutingContext(URI gatewayRegionalEndpoint, String region) { this.gatewayRegionalEndpoint = gatewayRegionalEndpoint; this.gatewayRegionalEndpointAsString = gatewayRegionalEndpoint.toString(); + this.region = region; this.thinclientRegionalEndpoint = null; - thinclientRegionalEndpointAsString = null; + this.thinclientRegionalEndpointAsString = null; + } + + public RegionalRoutingContext(URI gatewayRegionalEndpoint) { + this(gatewayRegionalEndpoint, null); + } + + public String getRegion() { + return this.region; } public URI getGatewayRegionalEndpoint() { From 9bd30eac1efbf2be550d66ab4073e2be5c629c69 Mon Sep 17 00:00:00 2001 From: Neha Rao Date: Mon, 29 Sep 2025 17:01:06 -0700 Subject: [PATCH 2/7] more progress --- .../com/azure/cosmos/BarrierRequestTests.java | 37 ++++++++++++------- .../RxDocumentClientUnderTest.java | 3 +- .../SpyClientUnderTestFactory.java | 3 +- .../com/azure/cosmos/CosmosAsyncClient.java | 2 +- .../com/azure/cosmos/CosmosClientBuilder.java | 6 +-- .../implementation/AsyncDocumentClient.java | 4 +- .../implementation/RxDocumentClientImpl.java | 6 +-- .../implementation/RxGatewayStoreModel.java | 7 ++-- .../implementation/ThinClientStoreModel.java | 3 +- .../GatewayAddressCache.java | 2 + 10 files changed, 45 insertions(+), 28 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java index 811da503b646..537b28eaff5c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java @@ -39,24 +39,19 @@ public void validateBarriersOnFailover() { .consistencyLevel(ConsistencyLevel.STRONG) .directMode(); - clientBuilder.httpRequestInterceptor((request) -> { + clientBuilder.httpRequestInterceptor((request, uri) -> { // After the initial write, simulate a network failure on address resolution. // This will trigger the SDK's failover logic. - if (request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.primaryRegion)) // Target the primary region + if (simulateAddressRefreshFailures.get() && + request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.primaryRegion)) // Target the primary region { - while (!failoverTriggered.compareAndSet(false, true)) { // Signal that the failover process has started - try { - Thread.sleep(2); - } catch (InterruptedException e) { - throw new RuntimeException(e); - } - } logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); + failoverTriggered.set(true); throw new InternalServerErrorException("Simulated network failure for address resolution.", HttpConstants.SubStatusCodes.UNKNOWN); } // Once the failover is triggered, intercept the subsequent metadata refresh call. - if (failoverTriggered.get()) + if (failoverTriggered.get() && uri.getPath() == "/") { // Return the modified account properties, making the SDK believe a failover has occurred. logger.info("Intercepting metadata call and returning modified account properties. New write region: " + this.secondaryRegion); @@ -68,8 +63,7 @@ public void validateBarriersOnFailover() { clientBuilder.storeResponseInterceptor((request, storeResponse) -> { - if ((request.getOperationType() == OperationType.Create && request.getResourceType() == ResourceType.Document) - || request.getOperationType() == OperationType.Head) { + if ((request.getOperationType() == OperationType.Create && request.getResourceType() == ResourceType.Document)) { String lsn = storeResponse.getHeaderValue(WFConstants.BackendHeaders.LSN); @@ -79,7 +73,24 @@ public void validateBarriersOnFailover() { storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, manipulatedGclsn); // Enable address refresh failures for subsequent barrier requests in the primary region. - simulateAddressRefreshFailures.set(true); + simulateAddressRefreshFailures.compareAndSet(false, true); + } + + // Track barrier requests (Head operations on a collection) + if (request.getOperationType() == OperationType.Head && request.getResourceType() == ResourceType.DocumentCollection) + { + // If the barrier request is in the secondary region, allow it to succeed. + if (request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.secondaryRegion)) + { + // Satisfy the barrier condition by setting GCLSN >= LSN + storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(storeResponse.getLSN())); + } + else + { + // For any other region (initially the primary), keep the barrier condition unmet. + long lsn = storeResponse.getLSN() - 2; + storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(lsn)); + } } return storeResponse; diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java index a80bd0996f4d..e6acce6a9410 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.function.BiFunction; import java.util.function.Function; import static org.mockito.Mockito.doAnswer; @@ -77,7 +78,7 @@ RxGatewayStoreModel createRxGatewayProxy( GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker globalPartitionEndpointManagerForPerPartitionCircuitBreaker, HttpClient rxOrigClient, ApiType apiType, - Function httpRequestInterceptor) { + BiFunction httpRequestInterceptor) { origHttpClient = rxOrigClient; spyHttpClient = Mockito.spy(rxOrigClient); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index e18dff8e6aa3..5a037624f456 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.concurrent.CompletableFuture; import java.util.concurrent.Future; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; @@ -128,7 +129,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, GlobalEndpointManager globalEndpointManager, HttpClient rxClient, ApiType apiType, - Function httpRequestInterceptor) { + BiFunction httpRequestInterceptor) { this.origRxGatewayStoreModel = super.createRxGatewayProxy( sessionContainer, consistencyLevel, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java index 6453cfab123f..445533c92e48 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java @@ -120,7 +120,7 @@ public final class CosmosAsyncClient implements Closeable { private final CosmosItemSerializer defaultCustomSerializer; private final java.util.function.Function containerFactory; private final BiFunction storeResponseInterceptor; - private Function httpRequestInterceptor; + private BiFunction httpRequestInterceptor; CosmosAsyncClient(CosmosClientBuilder builder) { // Async Cosmos client wrapper diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 012f3f6bb6e3..76d0d3a429a1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -160,7 +160,7 @@ public class CosmosClientBuilder implements private Function containerFactory = null; private BiFunction storeResponseInterceptor = null; - private Function httpRequestInterceptor = null; + private BiFunction httpRequestInterceptor = null; /** * Instantiates a new Cosmos client builder. @@ -176,12 +176,12 @@ public CosmosClientBuilder() { this.requestPolicies = new LinkedList<>(); } - CosmosClientBuilder httpRequestInterceptor(Function httpRequestInterceptor) { + CosmosClientBuilder httpRequestInterceptor(BiFunction httpRequestInterceptor) { this.httpRequestInterceptor = httpRequestInterceptor; return this; } - Function getHttpRequestInterceptor() { + BiFunction getHttpRequestInterceptor() { return this.httpRequestInterceptor; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index d852e6324bbf..9e0ea6924f8a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -120,7 +120,7 @@ class Builder { private boolean isPerPartitionAutomaticFailoverEnabled; private List operationPolicies; private BiFunction storeResponseInterceptor; - private Function httpRequestInterceptor; + private BiFunction httpRequestInterceptor; public Builder withServiceEndpoint(String serviceEndpoint) { try { @@ -298,7 +298,7 @@ public Builder withStoreResponseInterceptor(BiFunction httpRequestInterceptor) { + public Builder withHttpRequestInterceptor(BiFunction httpRequestInterceptor) { this.httpRequestInterceptor = httpRequestInterceptor; return this; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index a7c8b0d6ec98..cf2c2b194f49 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -721,7 +721,7 @@ private void updateThinProxy() { public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Function httpClientInterceptor, - Function httpRequestInterceptor, + BiFunction httpRequestInterceptor, BiFunction storeResponseInterceptor) { try { @@ -883,7 +883,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, ApiType apiType, - Function httpRequestInterceptor) { + BiFunction httpRequestInterceptor) { return new RxGatewayStoreModel( this, sessionContainer, @@ -901,7 +901,7 @@ ThinClientStoreModel createThinProxy(ISessionContainer sessionContainer, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - Function httpRequestInterceptor) { + BiFunction httpRequestInterceptor) { return new ThinClientStoreModel( this, sessionContainer, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index e2f2f857b593..4c6eab75dc2a 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -50,6 +50,7 @@ import java.util.Map.Entry; import java.util.Objects; import java.util.concurrent.Callable; +import java.util.function.BiFunction; import java.util.function.Function; import static com.azure.cosmos.implementation.HttpConstants.HttpHeaders.INTENDED_COLLECTION_RID_HEADER; @@ -80,7 +81,7 @@ public class RxGatewayStoreModel implements RxStoreModel, HttpTransportSerialize private GatewayServiceConfigurationReader gatewayServiceConfigurationReader; private RxClientCollectionCache collectionCache; private GatewayServerErrorInjector gatewayServerErrorInjector; - private Function httpRequestInterceptor; + private BiFunction httpRequestInterceptor; public RxGatewayStoreModel( DiagnosticsClientContext clientContext, @@ -91,7 +92,7 @@ public RxGatewayStoreModel( GlobalEndpointManager globalEndpointManager, HttpClient httpClient, ApiType apiType, - Function httpRequestInterceptor) { + BiFunction httpRequestInterceptor) { this.clientContext = clientContext; @@ -306,7 +307,7 @@ private Mono performRequestInternalCore(RxDocumentSer try { if (this.httpRequestInterceptor != null) { - RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request); + RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request, requestUri); if (result != null) { return Mono.just(result); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java index 2c039466e5ad..b05d7d17030d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java @@ -25,6 +25,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; +import java.util.function.BiFunction; import java.util.function.Function; /** @@ -47,7 +48,7 @@ public ThinClientStoreModel( UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, - Function httpRequestInterceptor) { + BiFunction httpRequestInterceptor) { super( clientContext, sessionContainer, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java index e62d7b8c6ca4..e368eece373c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java @@ -71,6 +71,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; import java.util.stream.Collectors; import static com.azure.cosmos.implementation.guava25.base.Preconditions.checkArgument; @@ -110,6 +111,7 @@ public class GatewayAddressCache implements IAddressCache { private final boolean replicaAddressValidationEnabled; private final Set replicaValidationScopes; private GatewayServerErrorInjector gatewayServerErrorInjector; + private Function httpRequestInterceptor; public GatewayAddressCache( DiagnosticsClientContext clientContext, From a1d3847570c1472599e06e702697ce5ba98d737a Mon Sep 17 00:00:00 2001 From: Neha Rao Date: Mon, 6 Oct 2025 15:25:09 -0700 Subject: [PATCH 3/7] progress --- .../com/azure/cosmos/BarrierRequestTests.java | 47 ++++++++++++------- .../RxDocumentClientUnderTest.java | 5 +- .../SpyClientUnderTestFactory.java | 2 +- .../ThinClientStoreModelTest.java | 3 +- .../GlobalAddressResolverTest.java | 3 +- .../implementation/BackoffRetryUtility.java | 3 +- .../implementation/ClientRetryPolicy.java | 1 + .../implementation/GlobalEndpointManager.java | 2 + .../implementation/RxDocumentClientImpl.java | 15 +++--- .../ServiceUnavailableException.java | 2 +- .../implementation/ThinClientStoreModel.java | 5 +- .../directconnectivity/ConsistencyWriter.java | 2 + .../GatewayAddressCache.java | 13 ++++- .../GlobalAddressResolver.java | 20 +++----- .../implementation/routing/LocationCache.java | 1 + 15 files changed, 74 insertions(+), 50 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java index 537b28eaff5c..479412153aad 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java @@ -3,19 +3,19 @@ package com.azure.cosmos; -import com.azure.cosmos.implementation.HttpConstants; -import com.azure.cosmos.implementation.InternalServerErrorException; -import com.azure.cosmos.implementation.OperationType; -import com.azure.cosmos.implementation.ResourceType; -import com.azure.cosmos.implementation.RxDocumentServiceResponse; +import com.azure.cosmos.implementation.*; import com.azure.cosmos.implementation.directconnectivity.WFConstants; import com.azure.cosmos.models.CosmosItemResponse; import com.azure.cosmos.rx.TestSuiteBase; +import io.netty.channel.ConnectTimeoutException; import org.testng.annotations.Factory; import org.testng.annotations.Test; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; +import static com.azure.cosmos.implementation.HttpConstants.SubStatusCodes.GATEWAY_ENDPOINT_UNAVAILABLE; import static org.assertj.core.api.AssertionsForClassTypes.assertThat; /** @@ -40,29 +40,43 @@ public void validateBarriersOnFailover() { .directMode(); clientBuilder.httpRequestInterceptor((request, uri) -> { - // After the initial write, simulate a network failure on address resolution. - // This will trigger the SDK's failover logic. - if (simulateAddressRefreshFailures.get() && - request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.primaryRegion)) // Target the primary region - { - logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); - failoverTriggered.set(true); - throw new InternalServerErrorException("Simulated network failure for address resolution.", HttpConstants.SubStatusCodes.UNKNOWN); - } + logger.info("inside httpRequestInterceptor, simulateAddressRefreshFailures: {}, operationType: {}, resourceType: {}, uri: {}", + simulateAddressRefreshFailures.get(), request.getOperationType(), request.getResourceType(), uri); // Once the failover is triggered, intercept the subsequent metadata refresh call. - if (failoverTriggered.get() && uri.getPath() == "/") + logger.info("Checking failoverTriggered to intercept metadata refresh call: " + failoverTriggered.get()); + logger.info("isMetadataRequest: " + request.isMetadataRequest()); + logger.info("ResourceType: " + request.getResourceType()); + logger.info("OperationType: " + request.getOperationType()); + if (failoverTriggered.get() && + request.isMetadataRequest() && + request.getResourceType() == ResourceType.DatabaseAccount) + // request.getOperationType() == OperationType.Read) { // Return the modified account properties, making the SDK believe a failover has occurred. logger.info("Intercepting metadata call and returning modified account properties. New write region: " + this.secondaryRegion); return new RxDocumentServiceResponse(null, null); } + // After the initial write, simulate a network failure on address resolution. + // This will trigger the SDK's failover logic. + if (simulateAddressRefreshFailures.get() && + request.isAddressRefresh() && + request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.primaryRegion)) // Target the primary region + { + logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); + logger.info("failoverTriggered: " + failoverTriggered.get()); + failoverTriggered.compareAndSet(false, true); + logger.info("failoverTriggered: " + failoverTriggered.get()); + Map headers = new HashMap<>(); + headers.put(HttpConstants.HttpHeaders.SUB_STATUS, Integer.toString(GATEWAY_ENDPOINT_UNAVAILABLE)); + throw new CosmosException(HttpConstants.SubStatusCodes.GATEWAY_ENDPOINT_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); + } + return null; // let other requests proceed normally }); clientBuilder.storeResponseInterceptor((request, storeResponse) -> { - if ((request.getOperationType() == OperationType.Create && request.getResourceType() == ResourceType.Document)) { String lsn = storeResponse.getHeaderValue(WFConstants.BackendHeaders.LSN); @@ -74,6 +88,7 @@ public void validateBarriersOnFailover() { // Enable address refresh failures for subsequent barrier requests in the primary region. simulateAddressRefreshFailures.compareAndSet(false, true); + logger.info("inside storeResponseInterceptor, set simulateAddressRefreshFailures to {}", simulateAddressRefreshFailures.get()); } // Track barrier requests (Head operations on a collection) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java index e6acce6a9410..18a29267e19b 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/RxDocumentClientUnderTest.java @@ -77,8 +77,7 @@ RxGatewayStoreModel createRxGatewayProxy( GlobalEndpointManager globalEndpointManager, GlobalPartitionEndpointManagerForPerPartitionCircuitBreaker globalPartitionEndpointManagerForPerPartitionCircuitBreaker, HttpClient rxOrigClient, - ApiType apiType, - BiFunction httpRequestInterceptor) { + ApiType apiType) { origHttpClient = rxOrigClient; spyHttpClient = Mockito.spy(rxOrigClient); @@ -97,6 +96,6 @@ RxGatewayStoreModel createRxGatewayProxy( globalEndpointManager, spyHttpClient, apiType, - httpRequestInterceptor); + null); } } diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java index 5a037624f456..cad0039cb288 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/SpyClientUnderTestFactory.java @@ -138,7 +138,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, globalEndpointManager, rxClient, apiType, - httpRequestInterceptor); + null); this.requests = Collections.synchronizedList(new ArrayList<>()); this.spyRxGatewayStoreModel = Mockito.spy(this.origRxGatewayStoreModel); this.initRequestCapture(); diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java index b1794724680d..64cd7fe37115 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/ThinClientStoreModelTest.java @@ -44,8 +44,7 @@ public void testThinClientStoreModel() throws Exception { ConsistencyLevel.SESSION, new UserAgentContainer(), globalEndpointManager, - httpClient, - null); + httpClient); RxDocumentServiceRequest dsr = RxDocumentServiceRequest.createFromName( clientContext, diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java index 331be53cc7af..5879e7d3e61c 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolverTest.java @@ -110,7 +110,7 @@ public void resolveAsync() throws Exception { GlobalAddressResolver globalAddressResolver = new GlobalAddressResolver(mockDiagnosticsClientContext(), httpClient, endpointManager, Protocol.HTTPS, authorizationTokenProvider, collectionCache, routingMapProvider, userAgentContainer, - serviceConfigReader, connectionPolicy, null); + serviceConfigReader, connectionPolicy, null, null); RxDocumentServiceRequest request; request = RxDocumentServiceRequest.createFromName(mockDiagnosticsClientContext(), OperationType.Read, @@ -145,6 +145,7 @@ public void submitOpenConnectionTasksAndInitCaches() { userAgentContainer, serviceConfigReader, connectionPolicy, + null, null); GlobalAddressResolver.EndpointCache endpointCache = new GlobalAddressResolver.EndpointCache(); GatewayAddressCache gatewayAddressCache = Mockito.mock(GatewayAddressCache.class); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/BackoffRetryUtility.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/BackoffRetryUtility.java index 4a9c5b9772d1..2a2c845994cc 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/BackoffRetryUtility.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/BackoffRetryUtility.java @@ -3,6 +3,7 @@ package com.azure.cosmos.implementation; import com.azure.cosmos.implementation.directconnectivity.AddressSelector; +import org.slf4j.LoggerFactory; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; import reactor.util.retry.Retry; @@ -16,7 +17,6 @@ * This is meant to be internally used only by our sdk. */ public class BackoffRetryUtility { - // transforms a retryFunc to a function which can be used by Observable.retryWhen(.) // also it invokes preRetryCallback prior to doing retry. public static final Quadruple InitialArgumentValuePolicyArg = Quadruple.with(false, false, @@ -30,7 +30,6 @@ public class BackoffRetryUtility { // a helper method for invoking callback method given the retry policy static public Mono executeRetry(Callable> callbackMethod, IRetryPolicy retryPolicy) { - return Mono.defer(() -> { try { return callbackMethod.call(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index 8306f185032c..17632a8f1fec 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -495,6 +495,7 @@ public void onBeforeSendRequest(RxDocumentServiceRequest request) { // Important: this is to make the fault injection context will not be lost between each retries this.request.faultInjectionRequestContext = this.faultInjectionRequestContext; + logger.info("inside onBeforeSendRequest"); // Resolve the endpoint for the request and pin the resolution to the resolved endpoint // This enables marking the endpoint unavailability on endpoint failover/unreachability this.regionalRoutingContext = this.globalEndpointManager.resolveServiceEndpoint(request); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index c5fae9c43669..11dc9c2abf41 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -191,6 +191,7 @@ public void close() { } public Mono refreshLocationAsync(DatabaseAccount databaseAccount, boolean forceRefresh) { + logger.info("refreshLocationAsync invoked. forceRefresh: {}", forceRefresh); return Mono.defer(() -> { logger.debug("refreshLocationAsync() invoked"); @@ -242,6 +243,7 @@ public int getPreferredLocationCount() { } private Mono refreshLocationPrivateAsync(DatabaseAccount databaseAccount) { + logger.info("inside refreshLocationPrivateAsync"); return Mono.defer(() -> { logger.debug("refreshLocationPrivateAsync() refreshing locations"); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index 6d5d68cdf6be..7de43d5789ba 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -260,6 +260,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization private final RetryPolicy retryPolicy; private HttpClient reactorHttpClient; private Function httpClientInterceptor; + private BiFunction httpRequestInterceptor; private volatile boolean useMultipleWriteLocations; // creator of TransportClient is responsible for disposing it. @@ -795,8 +796,7 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, this.consistencyLevel, this.userAgentContainer, this.globalEndpointManager, - this.reactorHttpClient, - httpRequestInterceptor); + this.reactorHttpClient); this.globalEndpointManager.init(); @@ -824,6 +824,8 @@ public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, this.partitionKeyRangeCache = new RxPartitionKeyRangeCache(RxDocumentClientImpl.this, collectionCache); + this.httpRequestInterceptor = httpRequestInterceptor; + updateGatewayProxy(); updateThinProxy(); clientTelemetry = new ClientTelemetry( @@ -889,7 +891,8 @@ private void initializeDirectConnectivity(BiFunction httpRequestInterceptor) { + HttpClient httpClient) { return new ThinClientStoreModel( this, sessionContainer, consistencyLevel, userAgentContainer, globalEndpointManager, - httpClient, - httpRequestInterceptor); + httpClient); } private HttpClient httpClient() { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ServiceUnavailableException.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ServiceUnavailableException.java index a321d59dc238..3c5fa878f176 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ServiceUnavailableException.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ServiceUnavailableException.java @@ -38,7 +38,7 @@ public ServiceUnavailableException(CosmosError cosmosError, setSubStatus(subStatusCode); } - ServiceUnavailableException(String message, int subStatusCode) { + public ServiceUnavailableException(String message, int subStatusCode) { this(message, null, (String) null, subStatusCode); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java index b05d7d17030d..9ba1d801b328 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ThinClientStoreModel.java @@ -47,8 +47,7 @@ public ThinClientStoreModel( ConsistencyLevel defaultConsistencyLevel, UserAgentContainer userAgentContainer, GlobalEndpointManager globalEndpointManager, - HttpClient httpClient, - BiFunction httpRequestInterceptor) { + HttpClient httpClient) { super( clientContext, sessionContainer, @@ -58,7 +57,7 @@ public ThinClientStoreModel( globalEndpointManager, httpClient, ApiType.SQL, - httpRequestInterceptor); + null); String userAgent = userAgentContainer != null ? userAgentContainer.getUserAgent() diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java index 0da969ce71be..89bcce1c5e7d 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java @@ -389,6 +389,8 @@ private void validateGlobalStrongWriteRegion(RxDocumentServiceRequest barrierReq { // validate that a regional failover has not occurred since the initial write. String currentRegion = barrierRequest.requestContext.regionalRoutingContextToRoute.getRegion(); + logger.info("Entered validateGlobalStrongWriteRegion. CurrentRegion: {}, OriginalWriteRegion: {}", + currentRegion, barrierRequest.requestContext.globalStrongWriteRegion); if (barrierRequest.requestContext.globalStrongWriteRegion != null && !Objects.equals(barrierRequest.requestContext.globalStrongWriteRegion, currentRegion)) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java index e368eece373c..72d8061cfebd 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java @@ -71,6 +71,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiFunction; import java.util.function.Function; import java.util.stream.Collectors; @@ -111,7 +112,7 @@ public class GatewayAddressCache implements IAddressCache { private final boolean replicaAddressValidationEnabled; private final Set replicaValidationScopes; private GatewayServerErrorInjector gatewayServerErrorInjector; - private Function httpRequestInterceptor; + public BiFunction httpRequestInterceptor; public GatewayAddressCache( DiagnosticsClientContext clientContext, @@ -359,6 +360,16 @@ private Mono> getServerAddressesViaGatewayInternalAsync(RxDocument JavaStreamUtils.toString(partitionKeyRangeIds, ",")); } + logger.debug("inside getServerAddressesViaGatewayInternalAsync"); + logger.debug("httpRequestInterceptor is " + (this.httpRequestInterceptor != null ? "not null" : "null")); + if (this.httpRequestInterceptor != null) { + logger.debug("getServerAddressesViaGatewayInternalAsync intercepted"); + RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request, null); + if (result != null) { + return Mono.just(result.getQueryResponse(null, Address.class)); + } + } + // track address refresh has happened, this is only meant to be used for fault injection validation request.faultInjectionRequestContext.recordAddressForceRefreshed(forceRefresh); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index 00905682b4d1..ee89b109d616 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -4,18 +4,7 @@ package com.azure.cosmos.implementation.directconnectivity; import com.azure.cosmos.CosmosContainerProactiveInitConfig; -import com.azure.cosmos.implementation.ApiType; -import com.azure.cosmos.implementation.Configs; -import com.azure.cosmos.implementation.ConnectionPolicy; -import com.azure.cosmos.implementation.CosmosSchedulers; -import com.azure.cosmos.implementation.DiagnosticsClientContext; -import com.azure.cosmos.implementation.DocumentCollection; -import com.azure.cosmos.implementation.GlobalEndpointManager; -import com.azure.cosmos.implementation.IAuthorizationTokenProvider; -import com.azure.cosmos.implementation.ImplementationBridgeHelpers; -import com.azure.cosmos.implementation.OpenConnectionResponse; -import com.azure.cosmos.implementation.RxDocumentServiceRequest; -import com.azure.cosmos.implementation.UserAgentContainer; +import com.azure.cosmos.implementation.*; import com.azure.cosmos.implementation.apachecommons.lang.tuple.ImmutablePair; import com.azure.cosmos.implementation.caches.RxCollectionCache; import com.azure.cosmos.implementation.caches.RxPartitionKeyRangeCache; @@ -40,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.BiFunction; import java.util.stream.Collectors; public class GlobalAddressResolver implements IAddressResolver { @@ -62,6 +52,7 @@ public class GlobalAddressResolver implements IAddressResolver { private ProactiveOpenConnectionsProcessor proactiveOpenConnectionsProcessor; private ConnectionPolicy connectionPolicy; private GatewayServerErrorInjector gatewayServerErrorInjector; + private BiFunction httpRequestInterceptor; public GlobalAddressResolver( DiagnosticsClientContext diagnosticsClientContext, @@ -74,7 +65,8 @@ public GlobalAddressResolver( UserAgentContainer userAgentContainer, GatewayServiceConfigurationReader serviceConfigReader, ConnectionPolicy connectionPolicy, - ApiType apiType) { + ApiType apiType, + BiFunction httpRequestInterceptor) { this.diagnosticsClientContext = diagnosticsClientContext; this.httpClient = httpClient; this.endpointManager = endpointManager; @@ -86,6 +78,7 @@ public GlobalAddressResolver( this.serviceConfigReader = serviceConfigReader; this.tcpConnectionEndpointRediscoveryEnabled = connectionPolicy.isTcpConnectionEndpointRediscoveryEnabled(); this.connectionPolicy = connectionPolicy; + this.httpRequestInterceptor = httpRequestInterceptor; int maxBackupReadEndpoints = (connectionPolicy.isReadRequestsFallbackEnabled()) ? GlobalAddressResolver.MaxBackupReadRegions : 0; this.maxEndpoints = maxBackupReadEndpoints + 2; // for write and alternate write getEndpoint (during failover) @@ -291,6 +284,7 @@ private EndpointCache getOrAddEndpoint(URI endpoint) { this.connectionPolicy, this.proactiveOpenConnectionsProcessor, this.gatewayServerErrorInjector); + gatewayAddressCache.httpRequestInterceptor = this.httpRequestInterceptor; AddressResolver addressResolver = new AddressResolver(); addressResolver.initializeCaches(this.collectionCache, this.routingMapProvider, gatewayAddressCache); EndpointCache cache = new EndpointCache(); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index d18afb984d56..3819060affc8 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -145,6 +145,7 @@ public void markEndpointUnavailableForWrite(URI endpoint) { * @param databaseAccount READ DatabaseAccount */ public void onDatabaseAccountRead(DatabaseAccount databaseAccount) { + logger.info("inside onDatabaseAccountRead"); this.updateLocationCache( databaseAccount.getWritableLocations(), databaseAccount.getReadableLocations(), From 86509bede126795d009bc190b8257ca0a4c7ee3e Mon Sep 17 00:00:00 2001 From: Neha Rao Date: Mon, 27 Oct 2025 08:19:23 -0700 Subject: [PATCH 4/7] progress, now intercepting account topology after failover --- .../src/main/java/module-info.java | 10 -- .../com/azure/cosmos/BarrierRequestTests.java | 106 +++++++++++++----- .../SpyClientUnderTestFactory.java | 2 +- .../com/azure/cosmos/CosmosAsyncClient.java | 2 +- .../com/azure/cosmos/CosmosClientBuilder.java | 6 +- .../implementation/AsyncDocumentClient.java | 4 +- .../implementation/ClientRetryPolicy.java | 21 ++-- .../implementation/GlobalEndpointManager.java | 37 +++--- .../implementation/RxDocumentClientImpl.java | 7 +- .../implementation/RxGatewayStoreModel.java | 8 +- .../directconnectivity/ConsistencyWriter.java | 2 +- .../GatewayAddressCache.java | 11 +- .../GlobalAddressResolver.java | 5 +- .../implementation/routing/LocationCache.java | 8 +- .../src/main/java/module-info.java | 87 -------------- 15 files changed, 145 insertions(+), 171 deletions(-) delete mode 100644 sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java delete mode 100644 sdk/cosmos/azure-cosmos/src/main/java/module-info.java diff --git a/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java deleted file mode 100644 index f1e04b052d72..000000000000 --- a/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -module com.azure.cosmos.test { - - requires transitive com.azure.cosmos; - requires reactor.core; - - exports com.azure.cosmos.test.faultinjection; -} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java index 479412153aad..736dfd18f8ae 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java @@ -4,9 +4,13 @@ package com.azure.cosmos; import com.azure.cosmos.implementation.*; +import com.azure.cosmos.implementation.directconnectivity.HttpUtils; +import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.directconnectivity.WFConstants; import com.azure.cosmos.models.CosmosItemResponse; import com.azure.cosmos.rx.TestSuiteBase; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufInputStream; import io.netty.channel.ConnectTimeoutException; import org.testng.annotations.Factory; import org.testng.annotations.Test; @@ -14,16 +18,19 @@ import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static com.azure.cosmos.implementation.HttpConstants.SubStatusCodes.GATEWAY_ENDPOINT_UNAVAILABLE; import static org.assertj.core.api.AssertionsForClassTypes.assertThat; /** - * Use fault injection to verify the handling of barrier requests for various scenarios. + * E2E testing to verify the handling of barrier requests. */ public class BarrierRequestTests extends TestSuiteBase { - String primaryRegion = "central us"; - String secondaryRegion = "east us"; + // eg. "Central US", case matters + String primaryRegion = "Central US"; + String secondaryRegion = "East US"; @Factory(dataProvider = "clientBuildersWithDirectTcpSession") public BarrierRequestTests(CosmosClientBuilder clientBuilder) { @@ -31,7 +38,7 @@ public BarrierRequestTests(CosmosClientBuilder clientBuilder) { } @Test - public void validateBarriersOnFailover() { + public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { AtomicBoolean simulateAddressRefreshFailures = new AtomicBoolean(false); AtomicBoolean failoverTriggered = new AtomicBoolean(false); @@ -39,31 +46,18 @@ public void validateBarriersOnFailover() { .consistencyLevel(ConsistencyLevel.STRONG) .directMode(); - clientBuilder.httpRequestInterceptor((request, uri) -> { + clientBuilder.httpRequestInterceptor((request) -> { logger.info("inside httpRequestInterceptor, simulateAddressRefreshFailures: {}, operationType: {}, resourceType: {}, uri: {}", - simulateAddressRefreshFailures.get(), request.getOperationType(), request.getResourceType(), uri); - - // Once the failover is triggered, intercept the subsequent metadata refresh call. - logger.info("Checking failoverTriggered to intercept metadata refresh call: " + failoverTriggered.get()); - logger.info("isMetadataRequest: " + request.isMetadataRequest()); - logger.info("ResourceType: " + request.getResourceType()); - logger.info("OperationType: " + request.getOperationType()); - if (failoverTriggered.get() && - request.isMetadataRequest() && - request.getResourceType() == ResourceType.DatabaseAccount) - // request.getOperationType() == OperationType.Read) - { - // Return the modified account properties, making the SDK believe a failover has occurred. - logger.info("Intercepting metadata call and returning modified account properties. New write region: " + this.secondaryRegion); - return new RxDocumentServiceResponse(null, null); - } + simulateAddressRefreshFailures.get(), request.getOperationType(), request.getResourceType()); // After the initial write, simulate a network failure on address resolution. // This will trigger the SDK's failover logic. if (simulateAddressRefreshFailures.get() && request.isAddressRefresh() && - request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.primaryRegion)) // Target the primary region + request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) // Target the primary region { + logger.info("request operationType: " + request.getOperationType()); + logger.info("request resourceType: " + request.getResourceType()); logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); logger.info("failoverTriggered: " + failoverTriggered.get()); failoverTriggered.compareAndSet(false, true); @@ -73,11 +67,35 @@ public void validateBarriersOnFailover() { throw new CosmosException(HttpConstants.SubStatusCodes.GATEWAY_ENDPOINT_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); } + // Once the failover is triggered, intercept the subsequent metadata refresh call. + logger.info("Checking failoverTriggered to intercept metadata refresh call: " + failoverTriggered.get()); + //logger.info("isMetadataRequest: " + request.isMetadataRequest()); + if (failoverTriggered.get() && request.getResourceType() == ResourceType.DatabaseAccount && request.getOperationType() == OperationType.Read) + { + // Return the modified account properties, making the SDK believe a failover has occurred. + logger.info("Intercepting metadata call and returning modified account properties to simulate failover. New write region: " + this.secondaryRegion); + + ByteBuf byteBuf = Utils.getUTF8BytesOrNull(getDatabaseAccountJsonAfterFailover()); + StoreResponse storeResponse = new StoreResponse( + TestConfigurations.HOST, + 200, + request.getHeaders(), + new ByteBufInputStream(byteBuf), + byteBuf.readableBytes()); + + return new RxDocumentServiceResponse(null, storeResponse); + } + return null; // let other requests proceed normally }); clientBuilder.storeResponseInterceptor((request, storeResponse) -> { - if ((request.getOperationType() == OperationType.Create && request.getResourceType() == ResourceType.Document)) { + logger.info("inside storeResponseInterceptor, operationType: {}, resourceType: {}, region: {}", + request.getOperationType(), request.getResourceType(), request.requestContext.regionalRoutingContextToRoute.getRegion()); + + if (request.getOperationType() == OperationType.Create && + request.getResourceType() == ResourceType.Document && + request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) { String lsn = storeResponse.getHeaderValue(WFConstants.BackendHeaders.LSN); @@ -95,7 +113,8 @@ public void validateBarriersOnFailover() { if (request.getOperationType() == OperationType.Head && request.getResourceType() == ResourceType.DocumentCollection) { // If the barrier request is in the secondary region, allow it to succeed. - if (request.requestContext.regionalRoutingContextToRoute.getRegion().equals(this.secondaryRegion)) + logger.info("Barrier request detected for region: {}", request.requestContext.regionalRoutingContextToRoute.getRegion()); + if (request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.secondaryRegion)) { // Satisfy the barrier condition by setting GCLSN >= LSN storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(storeResponse.getLSN())); @@ -107,7 +126,6 @@ public void validateBarriersOnFailover() { storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(lsn)); } } - return storeResponse; }); @@ -115,6 +133,7 @@ public void validateBarriersOnFailover() { CosmosAsyncContainer container = getSharedSinglePartitionCosmosContainer(client); CosmosItemResponse response = container.createItem(CosmosDiagnosticsTest.TestItem.createNewItem()).block(); + logger.info("Item created"); validateDiagnosticsIsPresent(response); CosmosDiagnosticsContext diagnosticsContext = response.getDiagnostics().getDiagnosticsContext(); @@ -127,4 +146,41 @@ private void validateDiagnosticsIsPresent(CosmosItemResponse httpRequestInterceptor) { + Function httpRequestInterceptor) { this.origRxGatewayStoreModel = super.createRxGatewayProxy( sessionContainer, consistencyLevel, diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java index 445533c92e48..6453cfab123f 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosAsyncClient.java @@ -120,7 +120,7 @@ public final class CosmosAsyncClient implements Closeable { private final CosmosItemSerializer defaultCustomSerializer; private final java.util.function.Function containerFactory; private final BiFunction storeResponseInterceptor; - private BiFunction httpRequestInterceptor; + private Function httpRequestInterceptor; CosmosAsyncClient(CosmosClientBuilder builder) { // Async Cosmos client wrapper diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java index 76d0d3a429a1..012f3f6bb6e3 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosClientBuilder.java @@ -160,7 +160,7 @@ public class CosmosClientBuilder implements private Function containerFactory = null; private BiFunction storeResponseInterceptor = null; - private BiFunction httpRequestInterceptor = null; + private Function httpRequestInterceptor = null; /** * Instantiates a new Cosmos client builder. @@ -176,12 +176,12 @@ public CosmosClientBuilder() { this.requestPolicies = new LinkedList<>(); } - CosmosClientBuilder httpRequestInterceptor(BiFunction httpRequestInterceptor) { + CosmosClientBuilder httpRequestInterceptor(Function httpRequestInterceptor) { this.httpRequestInterceptor = httpRequestInterceptor; return this; } - BiFunction getHttpRequestInterceptor() { + Function getHttpRequestInterceptor() { return this.httpRequestInterceptor; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java index 9e0ea6924f8a..d852e6324bbf 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/AsyncDocumentClient.java @@ -120,7 +120,7 @@ class Builder { private boolean isPerPartitionAutomaticFailoverEnabled; private List operationPolicies; private BiFunction storeResponseInterceptor; - private BiFunction httpRequestInterceptor; + private Function httpRequestInterceptor; public Builder withServiceEndpoint(String serviceEndpoint) { try { @@ -298,7 +298,7 @@ public Builder withStoreResponseInterceptor(BiFunction httpRequestInterceptor) { + public Builder withHttpRequestInterceptor(Function httpRequestInterceptor) { this.httpRequestInterceptor = httpRequestInterceptor; return this; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index dc2d4e72061e..c611cef810d6 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -33,7 +33,7 @@ public class ClientRetryPolicy extends DocumentClientRetryPolicy { private final static Logger logger = LoggerFactory.getLogger(ClientRetryPolicy.class); final static int RetryIntervalInMS = 1000; //Once we detect failover wait for 1 second before retrying request. - final static int MaxRetryCount = 120; + final static int MaxRetryCount = 10; // TODO: Remember to set this back after done testing private final static int MaxServiceUnavailableRetryCount = 1; private final DocumentClientRetryPolicy throttlingRetry; @@ -275,8 +275,10 @@ private ShouldRetryResult shouldRetryOnSessionNotAvailable(RxDocumentServiceRequ } private Mono shouldRetryOnEndpointFailureAsync(boolean isReadRequest, boolean forceRefresh, boolean usePreferredLocations) { + logger.info("in shouldRetryOnEndpointFailureAsync() Retry count = {}", this.failoverRetryCount); + if (!this.enableEndpointDiscovery || this.failoverRetryCount > MaxRetryCount) { - logger.warn("ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {}", this.failoverRetryCount); + logger.info("ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {}", this.failoverRetryCount); return Mono.just(ShouldRetryResult.noRetry()); } @@ -321,7 +323,7 @@ private Mono shouldRetryOnGatewayTimeout(CosmosException clie //if operation is data plane read, metadata read, or query plan it can be retried on a different endpoint. if (canPerformCrossRegionRetryOnGatewayReadTimeout) { if (!this.enableEndpointDiscovery || this.failoverRetryCount > MaxRetryCount) { - logger.warn("shouldRetryOnHttpTimeout() Not retrying. Retry count = {}", this.failoverRetryCount); + logger.info("shouldRetryOnHttpTimeout() Not retrying. Retry count = {}", this.failoverRetryCount); return Mono.just(ShouldRetryResult.noRetry()); } @@ -341,7 +343,7 @@ private Mono shouldRetryOnGatewayTimeout(CosmosException clie private Mono shouldNotRetryOnEndpointFailureAsync(boolean isReadRequest , boolean forceRefresh, boolean usePreferredLocations) { if (!this.enableEndpointDiscovery || this.failoverRetryCount > MaxRetryCount) { - logger.warn("ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {}", this.failoverRetryCount); + logger.info("ShouldRetryOnEndpointFailureAsync() Not retrying. Retry count = {}", this.failoverRetryCount); return Mono.just(ShouldRetryResult.noRetry()); } Mono refreshLocationCompletable = this.refreshLocation(isReadRequest, forceRefresh, usePreferredLocations); @@ -362,14 +364,15 @@ private Mono refreshLocation(boolean isReadRequest, boolean forceRefresh, URI gatewayRegionalEndpoint = this.regionalRoutingContext.getGatewayRegionalEndpoint(); if (isReadRequest) { - logger.warn("marking the endpoint {} as unavailable for read", gatewayRegionalEndpoint); + logger.info("marking the endpoint {} as unavailable for read", gatewayRegionalEndpoint); this.globalEndpointManager.markEndpointUnavailableForRead(gatewayRegionalEndpoint); } else { - logger.warn("marking the endpoint {} as unavailable for write", gatewayRegionalEndpoint); + logger.info("marking the endpoint {} as unavailable for write", gatewayRegionalEndpoint); this.globalEndpointManager.markEndpointUnavailableForWrite(gatewayRegionalEndpoint); } this.retryContext = new RetryContext(this.failoverRetryCount, usePreferredLocations); + logger.info("entering refreshLocationAsync"); return this.globalEndpointManager.refreshLocationAsync(null, forceRefresh); } @@ -414,7 +417,7 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( nonIdempotentWriteRetriesEnabled, isWebExceptionRetriable, cosmosException)) { - logger.warn( + logger.info( "shouldRetryOnBackendServiceUnavailableAsync() Not retrying" + " on write with non retriable exception and non server returned service unavailable. Retry count = {}", this.serviceUnavailableRetryCount); @@ -422,7 +425,7 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( } if (this.serviceUnavailableRetryCount++ > MaxServiceUnavailableRetryCount) { - logger.warn("shouldRetryOnBackendServiceUnavailableAsync() Not retrying. Retry count = {}", this.serviceUnavailableRetryCount); + logger.info("shouldRetryOnBackendServiceUnavailableAsync() Not retrying. Retry count = {}", this.serviceUnavailableRetryCount); return Mono.just(ShouldRetryResult.noRetry()); } @@ -436,7 +439,7 @@ private Mono shouldRetryOnBackendServiceUnavailableAsync( int availablePreferredLocations = this.globalEndpointManager.getPreferredLocationCount(); if (availablePreferredLocations <= 1) { - logger.warn("shouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {}", availablePreferredLocations); + logger.info("shouldRetryOnServiceUnavailable() Not retrying. No other regions available for the request. AvailablePreferredLocations = {}", availablePreferredLocations); return Mono.just(ShouldRetryResult.noRetry()); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index f47b29dcb818..d5c5e2a6ea67 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -135,6 +135,8 @@ public List getAvailableWriteRoutingContexts() { public static Mono getDatabaseAccountFromAnyLocationsAsync( URI defaultEndpoint, List locations, Function> getDatabaseAccountFn) { + logger.info("entered getDatabaseAccountFromAnyLocationsAsync with defaultEndpoint: {} and locations: {}", + defaultEndpoint, String.join(",", locations)); return getDatabaseAccountFn.apply(defaultEndpoint).onErrorResume( e -> { @@ -172,12 +174,12 @@ public URI getDefaultEndpoint() { } public void markEndpointUnavailableForRead(URI endpoint) { - logger.debug("Marking endpoint {} unavailable for read",endpoint); + logger.info("Marking endpoint {} unavailable for read",endpoint); this.locationCache.markEndpointUnavailableForRead(endpoint);; } public void markEndpointUnavailableForWrite(URI endpoint) { - logger.debug("Marking endpoint {} unavailable for Write",endpoint); + logger.info("Marking endpoint {} unavailable for Write",endpoint); this.locationCache.markEndpointUnavailableForWrite(endpoint); } @@ -193,13 +195,13 @@ public void close() { this.isClosed = true; this.perPartitionAutomaticFailoverConfigModifier = null; this.scheduler.dispose(); - logger.debug("GlobalEndpointManager closed."); + logger.info("GlobalEndpointManager closed."); } public Mono refreshLocationAsync(DatabaseAccount databaseAccount, boolean forceRefresh) { logger.info("refreshLocationAsync invoked. forceRefresh: {}", forceRefresh); return Mono.defer(() -> { - logger.debug("refreshLocationAsync() invoked"); + logger.info("refreshLocationAsync() invoked"); if (forceRefresh) { Mono databaseAccountObs = getDatabaseAccountFromAnyLocationsAsync( @@ -223,11 +225,11 @@ public Mono refreshLocationAsync(DatabaseAccount databaseAccount, boolean } if (!isRefreshing.compareAndSet(false, true)) { - logger.debug("in the middle of another refresh. Not invoking a new refresh."); + logger.info("in the middle of another refresh. Not invoking a new refresh."); return Mono.empty(); } - logger.debug("will refresh"); + logger.info("will refresh"); return this.refreshLocationPrivateAsync(databaseAccount).doOnError(e -> this.isRefreshing.set(false)); }); } @@ -251,7 +253,7 @@ public int getPreferredLocationCount() { private Mono refreshLocationPrivateAsync(DatabaseAccount databaseAccount) { logger.info("inside refreshLocationPrivateAsync"); return Mono.defer(() -> { - logger.debug("refreshLocationPrivateAsync() refreshing locations"); + logger.info("refreshLocationPrivateAsync() refreshing locations"); if (databaseAccount != null) { this.databaseAccountWriteLock.lock(); @@ -265,10 +267,10 @@ private Mono refreshLocationPrivateAsync(DatabaseAccount databaseAccount) Utils.ValueHolder canRefreshInBackground = new Utils.ValueHolder<>(); if (this.locationCache.shouldRefreshEndpoints(canRefreshInBackground)) { - logger.debug("shouldRefreshEndpoints: true"); + logger.info("shouldRefreshEndpoints: true"); if (databaseAccount == null && !canRefreshInBackground.v) { - logger.debug("shouldRefreshEndpoints: can't be done in background"); + logger.info("shouldRefreshEndpoints: can't be done in background"); Mono databaseAccountObs = getDatabaseAccountFromAnyLocationsAsync( this.defaultEndpoint, @@ -303,7 +305,7 @@ private Mono refreshLocationPrivateAsync(DatabaseAccount databaseAccount) this.isRefreshing.set(false); return Mono.empty(); } else { - logger.debug("shouldRefreshEndpoints: false, nothing to do."); + logger.info("shouldRefreshEndpoints: false, nothing to do."); this.isRefreshing.set(false); return Mono.empty(); } @@ -317,15 +319,19 @@ private void startRefreshLocationTimerAsync() { private Mono startRefreshLocationTimerAsync(boolean initialization) { if (this.isClosed) { - logger.debug("startRefreshLocationTimerAsync: nothing to do, it is closed"); + logger.info("startRefreshLocationTimerAsync: nothing to do, it is closed"); // if client is already closed, nothing to be done, just return. return Mono.empty(); } - logger.debug("registering a refresh in [{}] ms", this.backgroundRefreshLocationTimeIntervalInMS); + // TODO: revert after testing done + //logger.info("registering a refresh in [{}] ms", this.backgroundRefreshLocationTimeIntervalInMS); + int testRefreshInterval = 3000; + logger.info("registering a refresh in [{}] ms", testRefreshInterval); LocalDateTime now = LocalDateTime.now(); - int delayInMillis = initialization ? 0: this.backgroundRefreshLocationTimeIntervalInMS; + //int delayInMillis = initialization ? 0: this.backgroundRefreshLocationTimeIntervalInMS; + int delayInMillis = initialization ? 0: testRefreshInterval; this.refreshInBackground.set(true); @@ -338,7 +344,7 @@ private Mono startRefreshLocationTimerAsync(boolean initialization) { return Mono.empty(); } - logger.debug("startRefreshLocationTimerAsync() - Invoking refresh, I was registered on [{}]", now); + logger.info("startRefreshLocationTimerAsync() - Invoking refresh, I was registered on [{}]", now); Mono databaseAccountObs = GlobalEndpointManager.getDatabaseAccountFromAnyLocationsAsync(this.defaultEndpoint, new ArrayList<>(this.getEffectivePreferredRegions()), this::getDatabaseAccountAsync); @@ -361,6 +367,7 @@ public boolean hasThinClientReadLocations() { } private Mono getDatabaseAccountAsync(URI serviceEndpoint) { + logger.info("entered getDatabaseAccountAsync in GlobalEndpointManager"); return this.owner.getDatabaseAccountFromEndpoint(serviceEndpoint) .doOnNext(databaseAccount -> { if(databaseAccount != null) { @@ -395,7 +402,7 @@ private Mono getDatabaseAccountAsync(URI serviceEndpoint) { } } - logger.debug("account retrieved: {}", databaseAccount); + logger.info("account retrieved: {}", databaseAccount); }).single(); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java index bcf4a7fce8d1..9fec8c3b74f2 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxDocumentClientImpl.java @@ -261,7 +261,7 @@ public class RxDocumentClientImpl implements AsyncDocumentClient, IAuthorization private final RetryPolicy retryPolicy; private HttpClient reactorHttpClient; private Function httpClientInterceptor; - private BiFunction httpRequestInterceptor; + private Function httpRequestInterceptor; private volatile boolean useMultipleWriteLocations; // creator of TransportClient is responsible for disposing it. @@ -776,7 +776,7 @@ private void updateThinProxy() { public void init(CosmosClientMetadataCachesSnapshot metadataCachesSnapshot, Function httpClientInterceptor, - BiFunction httpRequestInterceptor, + Function httpRequestInterceptor, BiFunction storeResponseInterceptor) { try { @@ -947,7 +947,7 @@ RxGatewayStoreModel createRxGatewayProxy(ISessionContainer sessionContainer, GlobalEndpointManager globalEndpointManager, HttpClient httpClient, ApiType apiType, - BiFunction httpRequestInterceptor) { + Function httpRequestInterceptor) { return new RxGatewayStoreModel( this, sessionContainer, @@ -6331,6 +6331,7 @@ public AddressSelector getAddressSelector() { } public Flux getDatabaseAccountFromEndpoint(URI endpoint) { + logger.info("entered getDatabaseAccountFromEndpoint line 6334 RxDocumentClientImpl"); return Flux.defer(() -> { RxDocumentServiceRequest request = RxDocumentServiceRequest.create(this, OperationType.Read, ResourceType.DatabaseAccount, "", null, (Object) null); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java index 200e484cd8e6..c0b4858cd621 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/RxGatewayStoreModel.java @@ -84,7 +84,7 @@ public class RxGatewayStoreModel implements RxStoreModel, HttpTransportSerialize private GatewayServiceConfigurationReader gatewayServiceConfigurationReader; private RxClientCollectionCache collectionCache; private GatewayServerErrorInjector gatewayServerErrorInjector; - private BiFunction httpRequestInterceptor; + private Function httpRequestInterceptor; public RxGatewayStoreModel( DiagnosticsClientContext clientContext, @@ -95,7 +95,7 @@ public RxGatewayStoreModel( GlobalEndpointManager globalEndpointManager, HttpClient httpClient, ApiType apiType, - BiFunction httpRequestInterceptor) { + Function httpRequestInterceptor) { this.clientContext = clientContext; @@ -310,7 +310,7 @@ private Mono performRequestInternalCore(RxDocumentSer try { if (this.httpRequestInterceptor != null) { - RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request, requestUri); + RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request); if (result != null) { return Mono.just(result); } @@ -699,6 +699,8 @@ private Mono invokeAsync(RxDocumentServiceRequest req @Override public Mono processMessage(RxDocumentServiceRequest request) { + if (this.httpRequestInterceptor != null) {} + Mono responseObs = this.addIntendedCollectionRidAndSessionToken(request).then(invokeAsync(request)); return responseObs.onErrorResume( diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java index 89bcce1c5e7d..7af5e3ef0305 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java @@ -345,7 +345,7 @@ Mono barrierForGlobalStrong(RxDocumentServiceRequest request, Sto //if necessary we would have already refreshed cache by now. request.requestContext.forceRefreshAddressCache = false; - logger.debug("ConsistencyWriter: globalCommittedLsn {}, lsn {}", globalCommittedLsn, lsn); + logger.info("ConsistencyWriter: globalCommittedLsn {}, lsn {}", globalCommittedLsn, lsn); //barrier only if necessary, i.e. when write region completes write, but read regions have not. if (globalCommittedLsn.v < lsn.v) { diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java index 72d8061cfebd..04d1236346b1 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GatewayAddressCache.java @@ -112,7 +112,7 @@ public class GatewayAddressCache implements IAddressCache { private final boolean replicaAddressValidationEnabled; private final Set replicaValidationScopes; private GatewayServerErrorInjector gatewayServerErrorInjector; - public BiFunction httpRequestInterceptor; + public Function httpRequestInterceptor; public GatewayAddressCache( DiagnosticsClientContext clientContext, @@ -360,11 +360,12 @@ private Mono> getServerAddressesViaGatewayInternalAsync(RxDocument JavaStreamUtils.toString(partitionKeyRangeIds, ",")); } - logger.debug("inside getServerAddressesViaGatewayInternalAsync"); - logger.debug("httpRequestInterceptor is " + (this.httpRequestInterceptor != null ? "not null" : "null")); + logger.info("inside getServerAddressesViaGatewayInternalAsync"); + logger.info("httpRequestInterceptor is " + (this.httpRequestInterceptor != null ? "not null" : "null")); if (this.httpRequestInterceptor != null) { - logger.debug("getServerAddressesViaGatewayInternalAsync intercepted"); - RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request, null); + logger.info("getServerAddressesViaGatewayInternalAsync intercepted"); + logger.info("request operationType: " + request.getOperationType() + ", resourceType: " + request.getResourceType()); + RxDocumentServiceResponse result = this.httpRequestInterceptor.apply(request); if (result != null) { return Mono.just(result.getQueryResponse(null, Address.class)); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java index ee89b109d616..a8b80258bd7b 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/GlobalAddressResolver.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Function; import java.util.function.BiFunction; import java.util.stream.Collectors; @@ -52,7 +53,7 @@ public class GlobalAddressResolver implements IAddressResolver { private ProactiveOpenConnectionsProcessor proactiveOpenConnectionsProcessor; private ConnectionPolicy connectionPolicy; private GatewayServerErrorInjector gatewayServerErrorInjector; - private BiFunction httpRequestInterceptor; + private Function httpRequestInterceptor; public GlobalAddressResolver( DiagnosticsClientContext diagnosticsClientContext, @@ -66,7 +67,7 @@ public GlobalAddressResolver( GatewayServiceConfigurationReader serviceConfigReader, ConnectionPolicy connectionPolicy, ApiType apiType, - BiFunction httpRequestInterceptor) { + Function httpRequestInterceptor) { this.diagnosticsClientContext = diagnosticsClientContext; this.httpClient = httpClient; this.endpointManager = endpointManager; diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java index 3819060affc8..f60fcb413ecb 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/routing/LocationCache.java @@ -687,7 +687,7 @@ private void clearStaleEndpointUnavailabilityInfo() { this.unavailableLocationsExpirationTime) && Utils.tryRemove(this.locationUnavailabilityInfoByEndpoint, unavailableEndpoint, removedHolder)) { - logger.debug( + logger.info( "Removed endpoint [{}] unavailable for operations [{}] from unavailableEndpoints", unavailableEndpoint, unavailabilityInfoHolder.v.unavailableOperations); @@ -751,7 +751,7 @@ public LocationUnavailabilityInfo apply(RegionalRoutingContext url, LocationUnav this.updateLocationCache(); - logger.debug( + logger.info( "Endpoint [{}] unavailable for [{}] added/updated to unavailableEndpoints with timestamp [{}]", unavailableEndpoint, unavailableOperationType, @@ -771,7 +771,7 @@ private void updateLocationCache( Boolean enableMultipleWriteLocations) { synchronized (this.lockObject) { DatabaseAccountLocationsInfo nextLocationInfo = new DatabaseAccountLocationsInfo(this.locationInfo); - logger.debug("updating location cache ..., current readLocations [{}], current writeLocations [{}]", + logger.info("updating location cache ..., current readLocations [{}], current writeLocations [{}]", nextLocationInfo.readRegionalRoutingContexts, nextLocationInfo.writeRegionalRoutingContexts); if (preferenceList != null) { @@ -823,7 +823,7 @@ private void updateLocationCache( this.lastCacheUpdateTimestamp = Instant.now(); - logger.debug("updating location cache finished, new readLocations [{}], new writeLocations [{}]", + logger.info("updating location cache finished, new readLocations [{}], new writeLocations [{}]", nextLocationInfo.readRegionalRoutingContexts, nextLocationInfo.writeRegionalRoutingContexts); this.locationInfo = nextLocationInfo; } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java deleted file mode 100644 index 82b7f11e5056..000000000000 --- a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - - -module com.azure.cosmos { - - requires transitive com.azure.core; - - requires com.fasterxml.jackson.datatype.jsr310; - requires io.netty.transport; - requires io.netty.handler; - requires io.netty.common; - requires io.netty.buffer; - requires io.netty.codec; - requires io.netty.resolver; - requires io.netty.codec.http; - requires io.netty.codec.http2; - requires io.netty.transport.classes.epoll; - requires io.netty.handler.proxy; - requires reactor.netty.core; - requires reactor.netty.http; - requires com.codahale.metrics; - requires java.management; - requires jdk.management; - requires micrometer.core; - // This is only required by guava shaded libraries - requires java.logging; - requires HdrHistogram; - - // public API surface area - exports com.azure.cosmos; - exports com.azure.cosmos.models; - exports com.azure.cosmos.util; - - // export packages for multiple different modules - exports com.azure.cosmos.implementation to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; - exports com.azure.cosmos.implementation.caches to com.azure.cosmos.encryption, com.azure.cosmos.test; - exports com.azure.cosmos.implementation.feedranges to com.azure.cosmos.encryption, com.azure.cosmos.test; - exports com.azure.cosmos.implementation.apachecommons.lang to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; - exports com.azure.cosmos.implementation.guava25.base to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; - exports com.azure.cosmos.implementation.guava25.collect to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; - exports com.azure.cosmos.implementation.guava27 to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; - exports com.azure.cosmos.implementation.directconnectivity to com.azure.cosmos.encryption, com.azure.cosmos.test; - opens com.azure.cosmos.implementation to com.fasterxml.jackson.databind, java.logging, com.fasterxml.jackson.module.afterburner; - - // exporting implementation packages specifically for cosmos encryption - exports com.azure.cosmos.implementation.batch to com.azure.cosmos.encryption; - exports com.azure.cosmos.implementation.patch to com.azure.cosmos.encryption; - exports com.azure.cosmos.implementation.query to com.azure.cosmos.encryption; - exports com.azure.cosmos.implementation.apachecommons.lang.tuple to com.azure.cosmos.encryption, com.azure.cosmos.kafka.connect; - - // exporting some packages specifically for Jackson - opens com.azure.cosmos.implementation.caches to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.changefeed to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.changefeed.common to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.changefeed.pkversion to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.changefeed.epkversion to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.feedranges to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.changefeed.exceptions to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.directconnectivity to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.directconnectivity.rntbd to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.http to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.query to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.query.aggregation to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.query.metrics to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.query.orderbyquery to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.query.hybridsearch to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.routing to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.clienttelemetry to com.fasterxml.jackson.databind; - opens com.azure.cosmos.util to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.throughputControl to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.throughputControl.sdk.controller.group.global to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.perPartitionCircuitBreaker to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.perPartitionAutomaticFailover to com.fasterxml.jackson.databind; - - // exporting packages specifically for cosmos test - exports com.azure.cosmos.implementation.faultinjection to com.azure.cosmos.test; - exports com.azure.cosmos.implementation.directconnectivity.rntbd to com.azure.cosmos.test; - exports com.azure.cosmos.implementation.routing to com.azure.cosmos.test; - opens com.azure.cosmos to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; - opens com.azure.cosmos.models to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; - opens com.azure.cosmos.implementation.throughputControl.sdk to com.fasterxml.jackson.databind; - opens com.azure.cosmos.implementation.throughputControl.sdk.config to com.fasterxml.jackson.databind; - - uses com.azure.cosmos.implementation.guava25.base.PatternCompiler; - uses com.azure.core.util.tracing.Tracer; -} From a553529c78e33d49877b50a40f47ec2cbb4773a5 Mon Sep 17 00:00:00 2001 From: Neha Rao Date: Tue, 28 Oct 2025 10:38:37 -0700 Subject: [PATCH 5/7] modify test metadata refresh logic, logging for debugging --- .../com/azure/cosmos/BarrierRequestTests.java | 21 ++++++++++++++++--- .../implementation/ClientRetryPolicy.java | 3 ++- .../implementation/GlobalEndpointManager.java | 11 +++++----- .../directconnectivity/ConsistencyWriter.java | 12 +++++++++++ .../directconnectivity/StoreReader.java | 2 ++ .../rntbd/RntbdRequestManager.java | 4 ++++ 6 files changed, 44 insertions(+), 9 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java index 736dfd18f8ae..4fd71722b817 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java @@ -32,6 +32,8 @@ public class BarrierRequestTests extends TestSuiteBase { String primaryRegion = "Central US"; String secondaryRegion = "East US"; + GlobalEndpointManager globalEndpointManager = null; + @Factory(dataProvider = "clientBuildersWithDirectTcpSession") public BarrierRequestTests(CosmosClientBuilder clientBuilder) { super(clientBuilder); @@ -52,7 +54,7 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { // After the initial write, simulate a network failure on address resolution. // This will trigger the SDK's failover logic. - if (simulateAddressRefreshFailures.get() && + /*if (simulateAddressRefreshFailures.get() && request.isAddressRefresh() && request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) // Target the primary region { @@ -65,9 +67,9 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { Map headers = new HashMap<>(); headers.put(HttpConstants.HttpHeaders.SUB_STATUS, Integer.toString(GATEWAY_ENDPOINT_UNAVAILABLE)); throw new CosmosException(HttpConstants.SubStatusCodes.GATEWAY_ENDPOINT_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); - } + }*/ - // Once the failover is triggered, intercept the subsequent metadata refresh call. + // Once the failover is triggered, trigger a subsequent metadata refresh call (intercepted in httpRequestInterceptor). logger.info("Checking failoverTriggered to intercept metadata refresh call: " + failoverTriggered.get()); //logger.info("isMetadataRequest: " + request.isMetadataRequest()); if (failoverTriggered.get() && request.getResourceType() == ResourceType.DatabaseAccount && request.getOperationType() == OperationType.Read) @@ -112,6 +114,17 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { // Track barrier requests (Head operations on a collection) if (request.getOperationType() == OperationType.Head && request.getResourceType() == ResourceType.DocumentCollection) { + logger.info("Barrier request intercepted in storeResponseInterceptor for region: {}", request.requestContext.regionalRoutingContextToRoute.getRegion()); + logger.info("Setting failoverTriggered to true"); + failoverTriggered.compareAndSet(false, true); + + if (globalEndpointManager != null) { + logger.info("Trigerring metadata refresh"); + globalEndpointManager.refreshLocationAsync(null, true).block(); + } else { + logger.info("globalEndpointManager is null, cannot trigger metadata refresh"); + } + // If the barrier request is in the secondary region, allow it to succeed. logger.info("Barrier request detected for region: {}", request.requestContext.regionalRoutingContextToRoute.getRegion()); if (request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.secondaryRegion)) @@ -132,6 +145,8 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { CosmosAsyncClient client = clientBuilder.buildAsyncClient(); CosmosAsyncContainer container = getSharedSinglePartitionCosmosContainer(client); + globalEndpointManager = BridgeInternal.getContextClient(client).getGlobalEndpointManager(); + CosmosItemResponse response = container.createItem(CosmosDiagnosticsTest.TestItem.createNewItem()).block(); logger.info("Item created"); validateDiagnosticsIsPresent(response); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java index c611cef810d6..94c387d8c037 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/ClientRetryPolicy.java @@ -33,7 +33,7 @@ public class ClientRetryPolicy extends DocumentClientRetryPolicy { private final static Logger logger = LoggerFactory.getLogger(ClientRetryPolicy.class); final static int RetryIntervalInMS = 1000; //Once we detect failover wait for 1 second before retrying request. - final static int MaxRetryCount = 10; // TODO: Remember to set this back after done testing + final static int MaxRetryCount = 121; // TODO: Remember to set this back after done testing private final static int MaxServiceUnavailableRetryCount = 1; private final DocumentClientRetryPolicy throttlingRetry; @@ -521,6 +521,7 @@ public void onBeforeSendRequest(RxDocumentServiceRequest request) { // Resolve the endpoint for the request and pin the resolution to the resolved endpoint // This enables marking the endpoint unavailability on endpoint failover/unreachability this.regionalRoutingContext = this.globalEndpointManager.resolveServiceEndpoint(request); + logger.info("regional routing context resolved to {} with region {}", this.regionalRoutingContext.getGatewayRegionalEndpoint(), this.regionalRoutingContext.getRegion()); if (request.requestContext != null) { request.requestContext.routeToLocation(this.regionalRoutingContext); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java index d5c5e2a6ea67..18571f719b59 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/GlobalEndpointManager.java @@ -325,13 +325,14 @@ private Mono startRefreshLocationTimerAsync(boolean initialization) { } // TODO: revert after testing done - //logger.info("registering a refresh in [{}] ms", this.backgroundRefreshLocationTimeIntervalInMS); - int testRefreshInterval = 3000; - logger.info("registering a refresh in [{}] ms", testRefreshInterval); + logger.info("registering a refresh in [{}] ms", this.backgroundRefreshLocationTimeIntervalInMS); + //int testRefreshInterval = 3000; + //logger.info("registering a refresh in [{}] ms", testRefreshInterval); LocalDateTime now = LocalDateTime.now(); - //int delayInMillis = initialization ? 0: this.backgroundRefreshLocationTimeIntervalInMS; - int delayInMillis = initialization ? 0: testRefreshInterval; + // TODO: revert after testing done + int delayInMillis = initialization ? 0: this.backgroundRefreshLocationTimeIntervalInMS; + //int delayInMillis = initialization ? 0: testRefreshInterval; this.refreshInBackground.set(true); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java index 7af5e3ef0305..87584b7524ee 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/ConsistencyWriter.java @@ -113,6 +113,10 @@ public Mono writeAsync( TimeoutHelper timeout, boolean forceRefresh) { + logger.info("entered writeAsync with region {} and endpoint {}", + entity.requestContext.regionalRoutingContextToRoute.getRegion(), + entity.requestContext.regionalRoutingContextToRoute.getGatewayRegionalEndpoint()); + if (timeout.isElapsed() && // skip throwing RequestTimeout on first retry because the first retry with // force address refresh header can be critical to recover for example from @@ -149,6 +153,10 @@ Mono writePrivateAsync( TimeoutHelper timeout, boolean forceRefresh) { + logger.info("entered writePrivate with region {} and endpoint {}", + request.requestContext.regionalRoutingContextToRoute.getRegion(), + request.requestContext.regionalRoutingContextToRoute.getGatewayRegionalEndpoint()); + if (timeout.isElapsed() && // skip throwing RequestTimeout on first retry because the first retry with // force address refresh header can be critical to recover for example from @@ -325,6 +333,10 @@ boolean isGlobalStrongRequest(RxDocumentServiceRequest request, StoreResponse re } Mono barrierForGlobalStrong(RxDocumentServiceRequest request, StoreResponse response) { + logger.info("inside barrierForGlobalStrong with region {} and endpoint {}", + request.requestContext.regionalRoutingContextToRoute.getRegion(), + request.requestContext.regionalRoutingContextToRoute.getGatewayRegionalEndpoint()); + try { if (ReplicatedResourceClient.isGlobalStrongEnabled() && this.isGlobalStrongRequest(request, response)) { Utils.ValueHolder lsn = Utils.ValueHolder.initialize(-1L); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreReader.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreReader.java index 5169dfb121a7..2147ae74e93c 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreReader.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/StoreReader.java @@ -98,6 +98,8 @@ public Mono> readMultipleReplicaAsync( boolean checkMinLSN, boolean forceReadAll) { + logger.info("inside readMultipleReplicaAsync with region {}", entity.requestContext.regionalRoutingContextToRoute.getRegion()); + if (entity.requestContext.timeoutHelper.isElapsed()) { return Mono.error(new GoneException()); } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java index d7e8ccb56421..281c4c1e6227 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java @@ -614,6 +614,8 @@ public void flush(final ChannelHandlerContext context) { */ @Override public void read(final ChannelHandlerContext context) { + logger.info("inside RntbdRequestManager.read"); + this.traceOperation(context, "read"); context.read(); } @@ -630,6 +632,7 @@ public void read(final ChannelHandlerContext context) { */ @Override public void write(final ChannelHandlerContext context, final Object message, final ChannelPromise promise) { + logger.info("inside RntbdRequestManager.write"); this.traceOperation(context, "write", message); @@ -984,6 +987,7 @@ private void messageReceived(final ChannelHandlerContext context, final RntbdRes } final RxDocumentServiceRequest serviceRequest = requestRecord.args().serviceRequest(); + logger.info("inside RntbdRequestManager.messageReceived - serviceRequest region: {}", serviceRequest.requestContext.regionalRoutingContextToRoute.getRegion()); requestRecord.stage(RntbdRequestRecord.Stage.DECODE_STARTED, response.getDecodeStartTime()); From 2b0eb6460c4cf331bfbfb26624a36e90d3673aea Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Sat, 1 Nov 2025 19:49:56 -0400 Subject: [PATCH 6/7] Update min recommended version of `azure-cosmos` of v4.75.0. --- .../src/main/java/module-info.java | 9 + .../com/azure/cosmos/BarrierRequestTests.java | 154 ++++++++++++++---- .../rntbd/RntbdRequestManager.java | 4 +- .../src/main/java/module-info.java | 87 ++++++++++ 4 files changed, 224 insertions(+), 30 deletions(-) create mode 100644 sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java create mode 100644 sdk/cosmos/azure-cosmos/src/main/java/module-info.java diff --git a/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java new file mode 100644 index 000000000000..ccaaf699f81e --- /dev/null +++ b/sdk/cosmos/azure-cosmos-test/src/main/java/module-info.java @@ -0,0 +1,9 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +module com.azure.cosmos.test { + + requires transitive com.azure.cosmos; + + exports com.azure.cosmos.test.faultinjection; +} diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java index 4fd71722b817..a27aadf96a8f 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java @@ -4,7 +4,7 @@ package com.azure.cosmos; import com.azure.cosmos.implementation.*; -import com.azure.cosmos.implementation.directconnectivity.HttpUtils; +import com.azure.cosmos.implementation.directconnectivity.ReflectionUtils; import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.directconnectivity.WFConstants; import com.azure.cosmos.models.CosmosItemResponse; @@ -12,12 +12,18 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufInputStream; import io.netty.channel.ConnectTimeoutException; +import org.testng.annotations.BeforeClass; import org.testng.annotations.Factory; import org.testng.annotations.Test; +import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; +import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -28,21 +34,49 @@ * E2E testing to verify the handling of barrier requests. */ public class BarrierRequestTests extends TestSuiteBase { - // eg. "Central US", case matters - String primaryRegion = "Central US"; - String secondaryRegion = "East US"; - GlobalEndpointManager globalEndpointManager = null; + private String primaryRegion; + private String secondaryRegion; + private String primaryRegionalEndpointAsStr; + private String secondaryRegionalEndpointAsStr; + private AccountLevelLocationContext accountLevelLocationReadableLocationContext; @Factory(dataProvider = "clientBuildersWithDirectTcpSession") public BarrierRequestTests(CosmosClientBuilder clientBuilder) { super(clientBuilder); } + @BeforeClass(groups = {"multi-region"}) + public void beforeClass() { + CosmosAsyncClient cosmosAsyncClient = getClientBuilder().buildAsyncClient(); + + try { + RxDocumentClientImpl rxDocumentClient = (RxDocumentClientImpl) ReflectionUtils.getAsyncDocumentClient(cosmosAsyncClient); + GlobalEndpointManager globalEndpointManager = ReflectionUtils.getGlobalEndpointManager(rxDocumentClient); + DatabaseAccount databaseAccountSnapshot = globalEndpointManager.getLatestDatabaseAccount(); + + this.accountLevelLocationReadableLocationContext + = getAccountLevelLocationContext(databaseAccountSnapshot, false); + + assertThat(this.accountLevelLocationReadableLocationContext).isNotNull(); + assertThat(this.accountLevelLocationReadableLocationContext.serviceOrderedReadableRegions).isNotNull(); + assertThat(this.accountLevelLocationReadableLocationContext.serviceOrderedReadableRegions.size()).isEqualTo(2); + + this.primaryRegion = this.accountLevelLocationReadableLocationContext.serviceOrderedReadableRegions.get(0); + this.secondaryRegion = this.accountLevelLocationReadableLocationContext.serviceOrderedReadableRegions.get(1); + this.primaryRegionalEndpointAsStr = this.accountLevelLocationReadableLocationContext.regionNameToEndpoint.get(this.primaryRegion); + this.secondaryRegionalEndpointAsStr = this.accountLevelLocationReadableLocationContext.regionNameToEndpoint.get(this.secondaryRegion); + } finally { + cosmosAsyncClient.close(); + } + } + @Test - public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { + public void assertHandleBarriersForStrongConsistencyWriteDuringFailover() { + AtomicBoolean simulateAddressRefreshFailures = new AtomicBoolean(false); AtomicBoolean failoverTriggered = new AtomicBoolean(false); + AtomicReference globalEndpointManager = new AtomicReference<>(null); CosmosClientBuilder clientBuilder = getClientBuilder() .consistencyLevel(ConsistencyLevel.STRONG) @@ -54,10 +88,9 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { // After the initial write, simulate a network failure on address resolution. // This will trigger the SDK's failover logic. - /*if (simulateAddressRefreshFailures.get() && + if (simulateAddressRefreshFailures.get() && request.isAddressRefresh() && - request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) // Target the primary region - { + request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) { logger.info("request operationType: " + request.getOperationType()); logger.info("request resourceType: " + request.getResourceType()); logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); @@ -66,8 +99,8 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { logger.info("failoverTriggered: " + failoverTriggered.get()); Map headers = new HashMap<>(); headers.put(HttpConstants.HttpHeaders.SUB_STATUS, Integer.toString(GATEWAY_ENDPOINT_UNAVAILABLE)); - throw new CosmosException(HttpConstants.SubStatusCodes.GATEWAY_ENDPOINT_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); - }*/ + throw new CosmosException(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); + } // Once the failover is triggered, trigger a subsequent metadata refresh call (intercepted in httpRequestInterceptor). logger.info("Checking failoverTriggered to intercept metadata refresh call: " + failoverTriggered.get()); @@ -111,29 +144,41 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { logger.info("inside storeResponseInterceptor, set simulateAddressRefreshFailures to {}", simulateAddressRefreshFailures.get()); } + if (request.getOperationType() == OperationType.Create && + request.getResourceType() == ResourceType.Document && + request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.secondaryRegion)) { + + String lsn = storeResponse.getHeaderValue(WFConstants.BackendHeaders.LSN); + + // Decrement so that GCLSN < LSN to simulate the replication lag + String manipulatedGclsn = String.valueOf(Long.parseLong(lsn) - 2L); + + storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, manipulatedGclsn); + + // Enable address refresh failures for subsequent barrier requests in the primary region. + simulateAddressRefreshFailures.compareAndSet(false, true); + logger.info("inside storeResponseInterceptor, set simulateAddressRefreshFailures to {}", simulateAddressRefreshFailures.get()); + } + // Track barrier requests (Head operations on a collection) - if (request.getOperationType() == OperationType.Head && request.getResourceType() == ResourceType.DocumentCollection) - { + if (request.getOperationType() == OperationType.Head && request.getResourceType() == ResourceType.DocumentCollection) { logger.info("Barrier request intercepted in storeResponseInterceptor for region: {}", request.requestContext.regionalRoutingContextToRoute.getRegion()); logger.info("Setting failoverTriggered to true"); failoverTriggered.compareAndSet(false, true); if (globalEndpointManager != null) { logger.info("Trigerring metadata refresh"); - globalEndpointManager.refreshLocationAsync(null, true).block(); + globalEndpointManager.get().refreshLocationAsync(null, true).block(); } else { logger.info("globalEndpointManager is null, cannot trigger metadata refresh"); } // If the barrier request is in the secondary region, allow it to succeed. logger.info("Barrier request detected for region: {}", request.requestContext.regionalRoutingContextToRoute.getRegion()); - if (request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.secondaryRegion)) - { + if (request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.secondaryRegion)) { // Satisfy the barrier condition by setting GCLSN >= LSN storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(storeResponse.getLSN())); - } - else - { + } else { // For any other region (initially the primary), keep the barrier condition unmet. long lsn = storeResponse.getLSN() - 2; storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(lsn)); @@ -143,16 +188,27 @@ public void AssertHandleBarriersForStrongConsistencyWriteDuringFailover() { }); CosmosAsyncClient client = clientBuilder.buildAsyncClient(); - CosmosAsyncContainer container = getSharedSinglePartitionCosmosContainer(client); - globalEndpointManager = BridgeInternal.getContextClient(client).getGlobalEndpointManager(); + try { + CosmosAsyncContainer container = getSharedSinglePartitionCosmosContainer(client); - CosmosItemResponse response = container.createItem(CosmosDiagnosticsTest.TestItem.createNewItem()).block(); - logger.info("Item created"); - validateDiagnosticsIsPresent(response); + globalEndpointManager.set(BridgeInternal.getContextClient(client).getGlobalEndpointManager()); - CosmosDiagnosticsContext diagnosticsContext = response.getDiagnostics().getDiagnosticsContext(); - System.out.println(diagnosticsContext); + try { + CosmosItemResponse response = container.createItem(CosmosDiagnosticsTest.TestItem.createNewItem()).block(); + logger.info("Item created"); + validateDiagnosticsIsPresent(response); + + CosmosDiagnosticsContext diagnosticsContext = response.getDiagnostics().getDiagnosticsContext(); + logger.info("Diagnostics on successful Create : {}", diagnosticsContext); + } catch (CosmosException ex) { + CosmosDiagnosticsContext diagnosticsContext = ex.getDiagnostics().getDiagnosticsContext(); + logger.error("Diagnostics on unsuccessful Create : {}", diagnosticsContext.toJson()); + } + + } finally { + client.close(); + } } private void validateDiagnosticsIsPresent(CosmosItemResponse response) { @@ -180,8 +236,8 @@ private String getDatabaseAccountJsonAfterFailover() { "\"media\":\"//media/\",\"addresses\":\"//addresses/\",\"_dbs\":\"//dbs/\",\"writableLocations\":[{\"name\":\"" + secondaryRegion.toLowerCase().replaceAll("\\s", "") + "\",\"" + "databaseAccountEndpoint\":\"https://" + globalDatabaseAccountName + "-" + secondaryRegion.toLowerCase().replaceAll("\\s", "") + ".documents.azure.com:443/\"}],\"readableLocations\":[{\"name\"" + - ":\"Central US\",\"databaseAccountEndpoint\":\"https://neha-test-account4-centralus.documents.azure.com:443/\"},{\"name\"" + - ":\"East US 2\",\"databaseAccountEndpoint\":\"https://neha-test-account4-eastus2.documents.azure.com:443/\"}]," + + ":\"" + this.secondaryRegion + "\",\"databaseAccountEndpoint\":\"" + this.secondaryRegionalEndpointAsStr + "\"},{\"name\"" + + ":\"" + this.primaryRegion + "\",\"databaseAccountEndpoint\":\"" + this.primaryRegionalEndpointAsStr + "\"}]," + "\"enableMultipleWriteLocations\":false,\"continuousBackupEnabled\":false,\"enableNRegionSynchronousCommit\":false," + "\"enablePerPartitionFailoverBehavior\":false,\"userReplicationPolicy\":{\"asyncReplication\":false,\"minReplicaSetSize\":3," + "\"maxReplicasetSize\":4},\"userConsistencyPolicy\":{\"defaultConsistencyLevel\":\"Strong\"},\"systemReplicationPolicy\":" + @@ -198,4 +254,46 @@ private String getDatabaseAccountJsonAfterFailover() { return jsonString; } + + private AccountLevelLocationContext getAccountLevelLocationContext(DatabaseAccount databaseAccount, boolean writeOnly) { + Iterator locationIterator = + writeOnly ? databaseAccount.getWritableLocations().iterator() : databaseAccount.getReadableLocations().iterator(); + + List serviceOrderedReadableRegions = new ArrayList<>(); + List serviceOrderedWriteableRegions = new ArrayList<>(); + Map regionMap = new ConcurrentHashMap<>(); + + while (locationIterator.hasNext()) { + DatabaseAccountLocation accountLocation = locationIterator.next(); + regionMap.put(accountLocation.getName(), accountLocation.getEndpoint()); + + if (writeOnly) { + serviceOrderedWriteableRegions.add(accountLocation.getName()); + } else { + serviceOrderedReadableRegions.add(accountLocation.getName()); + } + } + + return new AccountLevelLocationContext( + serviceOrderedReadableRegions, + serviceOrderedWriteableRegions, + regionMap); + } + + private static class AccountLevelLocationContext { + private final List serviceOrderedReadableRegions; + @SuppressWarnings("unused") + private final List serviceOrderedWriteableRegions; + private final Map regionNameToEndpoint; + + public AccountLevelLocationContext( + List serviceOrderedReadableRegions, + List serviceOrderedWriteableRegions, + Map regionNameToEndpoint) { + + this.serviceOrderedReadableRegions = serviceOrderedReadableRegions; + this.serviceOrderedWriteableRegions = serviceOrderedWriteableRegions; + this.regionNameToEndpoint = regionNameToEndpoint; + } + } } diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java index 281c4c1e6227..ae42b4dd0ba4 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/implementation/directconnectivity/rntbd/RntbdRequestManager.java @@ -614,7 +614,7 @@ public void flush(final ChannelHandlerContext context) { */ @Override public void read(final ChannelHandlerContext context) { - logger.info("inside RntbdRequestManager.read"); +// logger.info("inside RntbdRequestManager.read"); this.traceOperation(context, "read"); context.read(); @@ -632,7 +632,7 @@ public void read(final ChannelHandlerContext context) { */ @Override public void write(final ChannelHandlerContext context, final Object message, final ChannelPromise promise) { - logger.info("inside RntbdRequestManager.write"); +// logger.info("inside RntbdRequestManager.write"); this.traceOperation(context, "write", message); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/module-info.java b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java new file mode 100644 index 000000000000..82b7f11e5056 --- /dev/null +++ b/sdk/cosmos/azure-cosmos/src/main/java/module-info.java @@ -0,0 +1,87 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + + +module com.azure.cosmos { + + requires transitive com.azure.core; + + requires com.fasterxml.jackson.datatype.jsr310; + requires io.netty.transport; + requires io.netty.handler; + requires io.netty.common; + requires io.netty.buffer; + requires io.netty.codec; + requires io.netty.resolver; + requires io.netty.codec.http; + requires io.netty.codec.http2; + requires io.netty.transport.classes.epoll; + requires io.netty.handler.proxy; + requires reactor.netty.core; + requires reactor.netty.http; + requires com.codahale.metrics; + requires java.management; + requires jdk.management; + requires micrometer.core; + // This is only required by guava shaded libraries + requires java.logging; + requires HdrHistogram; + + // public API surface area + exports com.azure.cosmos; + exports com.azure.cosmos.models; + exports com.azure.cosmos.util; + + // export packages for multiple different modules + exports com.azure.cosmos.implementation to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.caches to com.azure.cosmos.encryption, com.azure.cosmos.test; + exports com.azure.cosmos.implementation.feedranges to com.azure.cosmos.encryption, com.azure.cosmos.test; + exports com.azure.cosmos.implementation.apachecommons.lang to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.guava25.base to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.guava25.collect to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.guava27 to com.azure.cosmos.encryption, com.azure.cosmos.test, com.azure.cosmos.kafka.connect; + exports com.azure.cosmos.implementation.directconnectivity to com.azure.cosmos.encryption, com.azure.cosmos.test; + opens com.azure.cosmos.implementation to com.fasterxml.jackson.databind, java.logging, com.fasterxml.jackson.module.afterburner; + + // exporting implementation packages specifically for cosmos encryption + exports com.azure.cosmos.implementation.batch to com.azure.cosmos.encryption; + exports com.azure.cosmos.implementation.patch to com.azure.cosmos.encryption; + exports com.azure.cosmos.implementation.query to com.azure.cosmos.encryption; + exports com.azure.cosmos.implementation.apachecommons.lang.tuple to com.azure.cosmos.encryption, com.azure.cosmos.kafka.connect; + + // exporting some packages specifically for Jackson + opens com.azure.cosmos.implementation.caches to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.changefeed to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.changefeed.common to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.changefeed.pkversion to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.changefeed.epkversion to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.feedranges to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.changefeed.exceptions to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.directconnectivity to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.directconnectivity.rntbd to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.http to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.query to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.query.aggregation to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.query.metrics to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.query.orderbyquery to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.query.hybridsearch to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.routing to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.clienttelemetry to com.fasterxml.jackson.databind; + opens com.azure.cosmos.util to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.throughputControl to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.throughputControl.sdk.controller.group.global to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.perPartitionCircuitBreaker to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.perPartitionAutomaticFailover to com.fasterxml.jackson.databind; + + // exporting packages specifically for cosmos test + exports com.azure.cosmos.implementation.faultinjection to com.azure.cosmos.test; + exports com.azure.cosmos.implementation.directconnectivity.rntbd to com.azure.cosmos.test; + exports com.azure.cosmos.implementation.routing to com.azure.cosmos.test; + opens com.azure.cosmos to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; + opens com.azure.cosmos.models to com.azure.cosmos.test, com.azure.spring.data.cosmos, com.fasterxml.jackson.databind, com.fasterxml.jackson.module.afterburner, java.logging; + opens com.azure.cosmos.implementation.throughputControl.sdk to com.fasterxml.jackson.databind; + opens com.azure.cosmos.implementation.throughputControl.sdk.config to com.fasterxml.jackson.databind; + + uses com.azure.cosmos.implementation.guava25.base.PatternCompiler; + uses com.azure.core.util.tracing.Tracer; +} From ab06d8312f2bc15e4b516f372fa3cea45b65b403 Mon Sep 17 00:00:00 2001 From: Abhijeet Mohanty Date: Tue, 4 Nov 2025 16:02:05 -0500 Subject: [PATCH 7/7] Update min recommended version of `azure-cosmos` of v4.75.0. --- .../com/azure/cosmos/BarrierRequestTests.java | 199 ++++++++++++++++-- .../com/azure/cosmos/CosmosException.java | 11 + 2 files changed, 194 insertions(+), 16 deletions(-) diff --git a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java index a27aadf96a8f..23e20b2f24e4 100644 --- a/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java +++ b/sdk/cosmos/azure-cosmos-tests/src/test/java/com/azure/cosmos/BarrierRequestTests.java @@ -8,10 +8,15 @@ import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.directconnectivity.WFConstants; import com.azure.cosmos.models.CosmosItemResponse; +import com.azure.cosmos.models.ModelBridgeInternal; import com.azure.cosmos.rx.TestSuiteBase; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufInputStream; import io.netty.channel.ConnectTimeoutException; +import org.testng.SkipException; import org.testng.annotations.BeforeClass; import org.testng.annotations.Factory; import org.testng.annotations.Test; @@ -40,6 +45,7 @@ public class BarrierRequestTests extends TestSuiteBase { private String primaryRegionalEndpointAsStr; private String secondaryRegionalEndpointAsStr; private AccountLevelLocationContext accountLevelLocationReadableLocationContext; + private static final ObjectMapper mapper = new ObjectMapper(); @Factory(dataProvider = "clientBuildersWithDirectTcpSession") public BarrierRequestTests(CosmosClientBuilder clientBuilder) { @@ -72,15 +78,14 @@ public void beforeClass() { } @Test - public void assertHandleBarriersForStrongConsistencyWriteDuringFailover() { + public void assertHandleBarriersForStrongConsistencyNoCrossRegionRetry() { AtomicBoolean simulateAddressRefreshFailures = new AtomicBoolean(false); AtomicBoolean failoverTriggered = new AtomicBoolean(false); AtomicReference globalEndpointManager = new AtomicReference<>(null); CosmosClientBuilder clientBuilder = getClientBuilder() - .consistencyLevel(ConsistencyLevel.STRONG) - .directMode(); + .consistencyLevel(ConsistencyLevel.STRONG); clientBuilder.httpRequestInterceptor((request) -> { logger.info("inside httpRequestInterceptor, simulateAddressRefreshFailures: {}, operationType: {}, resourceType: {}, uri: {}", @@ -88,19 +93,19 @@ public void assertHandleBarriersForStrongConsistencyWriteDuringFailover() { // After the initial write, simulate a network failure on address resolution. // This will trigger the SDK's failover logic. - if (simulateAddressRefreshFailures.get() && - request.isAddressRefresh() && - request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) { - logger.info("request operationType: " + request.getOperationType()); - logger.info("request resourceType: " + request.getResourceType()); - logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); - logger.info("failoverTriggered: " + failoverTriggered.get()); - failoverTriggered.compareAndSet(false, true); - logger.info("failoverTriggered: " + failoverTriggered.get()); - Map headers = new HashMap<>(); - headers.put(HttpConstants.HttpHeaders.SUB_STATUS, Integer.toString(GATEWAY_ENDPOINT_UNAVAILABLE)); - throw new CosmosException(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); - } +// if (simulateAddressRefreshFailures.get() && +// request.isAddressRefresh() && +// request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) { +// logger.info("request operationType: " + request.getOperationType()); +// logger.info("request resourceType: " + request.getResourceType()); +// logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); +// logger.info("failoverTriggered: " + failoverTriggered.get()); +// failoverTriggered.compareAndSet(false, true); +// logger.info("failoverTriggered: " + failoverTriggered.get()); +// Map headers = new HashMap<>(); +// headers.put(HttpConstants.HttpHeaders.SUB_STATUS, Integer.toString(GATEWAY_ENDPOINT_UNAVAILABLE)); +// throw new CosmosException(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); +// } // Once the failover is triggered, trigger a subsequent metadata refresh call (intercepted in httpRequestInterceptor). logger.info("Checking failoverTriggered to intercept metadata refresh call: " + failoverTriggered.get()); @@ -189,6 +194,168 @@ public void assertHandleBarriersForStrongConsistencyWriteDuringFailover() { CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + if (BridgeInternal + .getContextClient(client) + .getConnectionPolicy() + .getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("Barrier requests cannot be intercepted in Gateway Mode"); + } + + try { + CosmosAsyncContainer container = getSharedSinglePartitionCosmosContainer(client); + + globalEndpointManager.set(BridgeInternal.getContextClient(client).getGlobalEndpointManager()); + + try { + CosmosItemResponse response = container.createItem(CosmosDiagnosticsTest.TestItem.createNewItem()).block(); + logger.info("Item created"); + validateDiagnosticsIsPresent(response); + + CosmosDiagnosticsContext diagnosticsContext = response.getDiagnostics().getDiagnosticsContext(); + logger.info("Diagnostics on successful Create : {}", diagnosticsContext); + } catch (CosmosException ex) { + CosmosDiagnosticsContext diagnosticsContext = ex.getDiagnostics().getDiagnosticsContext(); + logger.error("Diagnostics on unsuccessful Create : {}", diagnosticsContext.toJson()); + } + + } finally { + client.close(); + } + } + + @Test + public void assertHandleBarriersForStrongConsistencyWithCrossRegionRetry() { + + AtomicBoolean simulateAddressRefreshFailures = new AtomicBoolean(false); + AtomicBoolean failoverTriggered = new AtomicBoolean(false); + AtomicReference globalEndpointManager = new AtomicReference<>(null); + + CosmosClientBuilder clientBuilder = getClientBuilder() + .consistencyLevel(ConsistencyLevel.STRONG); + + clientBuilder.httpRequestInterceptor((request) -> { + logger.info("inside httpRequestInterceptor, simulateAddressRefreshFailures: {}, operationType: {}, resourceType: {}, uri: {}", + simulateAddressRefreshFailures.get(), request.getOperationType(), request.getResourceType()); + + // After the initial write, simulate a network failure on address resolution. + // This will trigger the SDK's failover logic. + if (simulateAddressRefreshFailures.get() && + request.isAddressRefresh() && + request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) { + logger.info("request operationType: " + request.getOperationType()); + logger.info("request resourceType: " + request.getResourceType()); + logger.info("Simulating network failure for address resolution for region " + this.primaryRegion); + logger.info("failoverTriggered: " + failoverTriggered.get()); + failoverTriggered.compareAndSet(false, true); + logger.info("failoverTriggered: " + failoverTriggered.get()); + Map headers = new HashMap<>(); + headers.put(HttpConstants.HttpHeaders.SUB_STATUS, Integer.toString(GATEWAY_ENDPOINT_UNAVAILABLE)); + throw new CosmosException(HttpConstants.StatusCodes.SERVICE_UNAVAILABLE, "Simulating network failure for address resolution for region", headers, new ConnectTimeoutException()); + } + + // Once the failover is triggered, trigger a subsequent metadata refresh call (intercepted in httpRequestInterceptor). + logger.info("Checking failoverTriggered to intercept metadata refresh call: " + failoverTriggered.get()); + //logger.info("isMetadataRequest: " + request.isMetadataRequest()); + if (failoverTriggered.get() && request.getResourceType() == ResourceType.DatabaseAccount && request.getOperationType() == OperationType.Read) + { + // Return the modified account properties, making the SDK believe a failover has occurred. + logger.info("Intercepting metadata call and returning modified account properties to simulate failover. New write region: " + this.secondaryRegion); + + ByteBuf byteBuf = Utils.getUTF8BytesOrNull(getDatabaseAccountJsonAfterFailover()); + StoreResponse storeResponse = new StoreResponse( + TestConfigurations.HOST, + 200, + request.getHeaders(), + new ByteBufInputStream(byteBuf), + byteBuf.readableBytes()); + + return new RxDocumentServiceResponse(null, storeResponse); + } + + return null; // let other requests proceed normally + }); + + clientBuilder.storeResponseInterceptor((request, storeResponse) -> { + logger.info("inside storeResponseInterceptor, operationType: {}, resourceType: {}, region: {}", + request.getOperationType(), request.getResourceType(), request.requestContext.regionalRoutingContextToRoute.getRegion()); + + if (request.getOperationType() == OperationType.Create && + request.getResourceType() == ResourceType.Document && + request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.primaryRegion)) { + + String lsn = storeResponse.getHeaderValue(WFConstants.BackendHeaders.LSN); + + // Decrement so that GCLSN < LSN to simulate the replication lag + String manipulatedGclsn = String.valueOf(Long.parseLong(lsn) - 2L); + + storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, manipulatedGclsn); + + // Enable address refresh failures for subsequent barrier requests in the primary region. + simulateAddressRefreshFailures.compareAndSet(false, true); + logger.info("inside storeResponseInterceptor, set simulateAddressRefreshFailures to {}", simulateAddressRefreshFailures.get()); + } + +// if (request.getOperationType() == OperationType.Create && +// request.getResourceType() == ResourceType.Document && +// request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.secondaryRegion)) { +// +// String jsonResponse = "{status:success,message:Operation completed}"; +// +// try { +// storeResponse.setResponseBodyAsJson(mapper.readTree(jsonResponse)); +// } catch (JsonProcessingException e) { +// logger.error("Error while setting response body as JSON", e); +// } +// storeResponse.withRemappedStatusCode(HttpConstants.StatusCodes.CREATED, 0d); +// +// String lsn = storeResponse.getHeaderValue(WFConstants.BackendHeaders.LSN); +// +// storeResponse.setHeaderValue(HttpConstants.HttpHeaders.SUB_STATUS, String.valueOf(HttpConstants.SubStatusCodes.UNKNOWN)); +// +// // Decrement so that GCLSN < LSN to simulate the replication lag +// String manipulatedGclsn = String.valueOf(Long.parseLong(lsn) - 2L); +// +// storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, manipulatedGclsn); +// +// logger.info("inside storeResponseInterceptor, set simulateAddressRefreshFailures to {}", simulateAddressRefreshFailures.get()); +// } + + // Track barrier requests (Head operations on a collection) + if (request.getOperationType() == OperationType.Head && request.getResourceType() == ResourceType.DocumentCollection) { + logger.info("Barrier request intercepted in storeResponseInterceptor for region: {}", request.requestContext.regionalRoutingContextToRoute.getRegion()); + logger.info("Setting failoverTriggered to true"); + failoverTriggered.compareAndSet(false, true); + + if (globalEndpointManager != null) { + logger.info("Trigerring metadata refresh"); + globalEndpointManager.get().refreshLocationAsync(null, true).block(); + } else { + logger.info("globalEndpointManager is null, cannot trigger metadata refresh"); + } + + // If the barrier request is in the secondary region, allow it to succeed. + logger.info("Barrier request detected for region: {}", request.requestContext.regionalRoutingContextToRoute.getRegion()); + if (request.requestContext.regionalRoutingContextToRoute.getRegion().equalsIgnoreCase(this.secondaryRegion)) { + // Satisfy the barrier condition by setting GCLSN >= LSN + storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(storeResponse.getLSN())); + } else { + // For any other region (initially the primary), keep the barrier condition unmet. + long lsn = storeResponse.getLSN() - 2; + storeResponse.setHeaderValue(WFConstants.BackendHeaders.GLOBAL_COMMITTED_LSN, String.valueOf(lsn)); + } + } + return storeResponse; + }); + + CosmosAsyncClient client = clientBuilder.buildAsyncClient(); + + if (BridgeInternal + .getContextClient(client) + .getConnectionPolicy() + .getConnectionMode() == ConnectionMode.GATEWAY) { + throw new SkipException("Barrier requests cannot be intercepted in Gateway Mode"); + } + try { CosmosAsyncContainer container = getSharedSinglePartitionCosmosContainer(client); diff --git a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosException.java b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosException.java index 5d4731cef122..68016c0132af 100644 --- a/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosException.java +++ b/sdk/cosmos/azure-cosmos/src/main/java/com/azure/cosmos/CosmosException.java @@ -12,6 +12,7 @@ import com.azure.cosmos.implementation.Utils; import com.azure.cosmos.implementation.apachecommons.lang.StringUtils; import com.azure.cosmos.implementation.batch.BatchExecUtils; +import com.azure.cosmos.implementation.directconnectivity.StoreResponse; import com.azure.cosmos.implementation.directconnectivity.Uri; import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdChannelAcquisitionTimeline; import com.azure.cosmos.implementation.directconnectivity.rntbd.RntbdChannelStatistics; @@ -92,6 +93,8 @@ public class CosmosException extends AzureException { */ private RntbdChannelStatistics rntbdChannelStatistics; + private StoreResponse interceptedStoreResponse; + /** * LSN */ @@ -615,6 +618,14 @@ Map> getReplicaStatusList() { return this.replicaStatusList; } + public void setInterceptedStoreResponse(StoreResponse storeResponse) { + this.interceptedStoreResponse = storeResponse; + } + + public StoreResponse getInterceptedStoreResponse() { + return this.interceptedStoreResponse; + } + /////////////////////////////////////////////////////////////////////////////////////////// // the following helper/accessor only helps to access this class outside of this package.// ///////////////////////////////////////////////////////////////////////////////////////////