From 9a5d5022d9c6ce1af71e088473fb4a97c49316e5 Mon Sep 17 00:00:00 2001 From: Felix GV Date: Wed, 18 Dec 2024 20:03:26 -0800 Subject: [PATCH] Tweaked flaky unit test RouterRequestThrottlingTest --- .../throttle/RouterRequestThrottlingTest.java | 75 +++++++++++++------ 1 file changed, 54 insertions(+), 21 deletions(-) diff --git a/services/venice-router/src/test/java/com/linkedin/venice/router/throttle/RouterRequestThrottlingTest.java b/services/venice-router/src/test/java/com/linkedin/venice/router/throttle/RouterRequestThrottlingTest.java index 05ba49ed96..d8d5bab61f 100644 --- a/services/venice-router/src/test/java/com/linkedin/venice/router/throttle/RouterRequestThrottlingTest.java +++ b/services/venice-router/src/test/java/com/linkedin/venice/router/throttle/RouterRequestThrottlingTest.java @@ -4,10 +4,12 @@ import static org.mockito.Mockito.any; import static org.mockito.Mockito.doReturn; import static org.mockito.Mockito.mock; +import static org.testng.Assert.assertTrue; import com.linkedin.alpini.router.api.HostFinder; import com.linkedin.alpini.router.api.HostHealthMonitor; import com.linkedin.alpini.router.api.PartitionFinder; +import com.linkedin.alpini.router.api.RouterException; import com.linkedin.alpini.router.api.Scatter; import com.linkedin.alpini.router.api.ScatterGatherRequest; import com.linkedin.venice.helix.ZkRoutersClusterManager; @@ -72,7 +74,12 @@ public static Object[][] requestType() { return new Object[][] { { RequestType.MULTI_GET }, { RequestType.COMPUTE } }; } - @Test(timeOut = 30000, dataProvider = "multiGet_compute") + /** A simple Runnable-like interface just to reduce boilerplate in the test... */ + private interface ScatterCall { + void run() throws RouterException; + } + + @Test(timeOut = 30000, dataProvider = "multiGet_compute", invocationCount = 100) public void testMultiKeyThrottling(RequestType requestType) throws Exception { // Allow 10 multi-key requests per second int batchGetSize = 100; @@ -115,18 +122,20 @@ public void testMultiKeyThrottling(RequestType requestType) throws Exception { HostFinder hostFinder = mock(VeniceHostFinder.class); HostHealthMonitor hostHealthMonitor = mock(HostHealthMonitor.class); + ScatterCall scatterCall = () -> delegateMode.scatter( + scatter, + HttpMethod.POST.name(), + storeName + "_v1", + partitionFinder, + hostFinder, + hostHealthMonitor, + VeniceRole.REPLICA); + // The router shouldn't throttle any request if the multi-get QPS is below 10 for (int iter = 0; iter < 3; iter++) { for (int i = 0; i < allowedQPS; i++) { try { - delegateMode.scatter( - scatter, - HttpMethod.POST.name(), - storeName + "_v1", - partitionFinder, - hostFinder, - hostHealthMonitor, - VeniceRole.REPLICA); + scatterCall.run(); } catch (Exception e) { Assert.fail("router shouldn't throttle any multi-get requests if the QPS is below " + allowedQPS); } @@ -138,16 +147,11 @@ public void testMultiKeyThrottling(RequestType requestType) throws Exception { // Router should throttle the multi-get requests if QPS exceeds 10 boolean multiGetThrottled = false; - for (int i = 0; i < allowedQPS + 1; i++) { + int queriesSent = allowedQPS + 1; + long startTime = System.currentTimeMillis(); + for (int i = 0; i < queriesSent; i++) { try { - delegateMode.scatter( - scatter, - HttpMethod.POST.name(), - storeName + "_v1", - partitionFinder, - hostFinder, - hostHealthMonitor, - VeniceRole.REPLICA); + scatterCall.run(); } catch (Exception e) { multiGetThrottled = true; if (i < allowedQPS) { @@ -156,9 +160,38 @@ public void testMultiKeyThrottling(RequestType requestType) throws Exception { } } } + long elapsedTime = System.currentTimeMillis() - startTime; - // restore the throttler so that it doesn't affect the following test case - throttler.restoreAllThrottlers(); - Assert.assertTrue(multiGetThrottled); + if (!multiGetThrottled) { + int additionalQueriesNeeded = -1; + for (int i = 1; i < queriesSent * 10; i++) { + try { + scatterCall.run(); + } catch (Exception e) { + additionalQueriesNeeded = i; + break; + } + } + long totalElapsedTime = System.currentTimeMillis() - startTime; + if (additionalQueriesNeeded < 0) { + Assert.fail( + "Never triggered quota at all, even after sending 10x more than it should have needed! Original elapsed time: " + + elapsedTime + "; total elapsed time: " + totalElapsedTime); + } else { + /** + * N.B.: This test used to be flaky because it would be 1 request short of triggering the quota. It's not clear + * why it sometimes takes a little more, and sometimes doesn't (floating point arithmetic being wonky, perhaps?) + * but here we're adding a small tolerance threshold, while still ensuring that larger deviations still result + * in failure. + */ + int toleranceThreshold = 1; + assertTrue( + additionalQueriesNeeded <= toleranceThreshold, + "Should have triggered quota in " + queriesSent + " requests, but it took " + additionalQueriesNeeded + + " additional request(s) before finally triggering, which is higher than the tolerance threshold of " + + toleranceThreshold + ". Original elapsed time: " + elapsedTime + "; total elapsed time: " + + totalElapsedTime); + } + } } }