Skip to content

Commit

Permalink
Tweaked flaky unit test RouterRequestThrottlingTest
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixGV committed Dec 19, 2024
1 parent 6ca1104 commit 9a5d502
Showing 1 changed file with 54 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.doReturn;
import static org.mockito.Mockito.mock;
import static org.testng.Assert.assertTrue;

import com.linkedin.alpini.router.api.HostFinder;
import com.linkedin.alpini.router.api.HostHealthMonitor;
import com.linkedin.alpini.router.api.PartitionFinder;
import com.linkedin.alpini.router.api.RouterException;
import com.linkedin.alpini.router.api.Scatter;
import com.linkedin.alpini.router.api.ScatterGatherRequest;
import com.linkedin.venice.helix.ZkRoutersClusterManager;
Expand Down Expand Up @@ -72,7 +74,12 @@ public static Object[][] requestType() {
return new Object[][] { { RequestType.MULTI_GET }, { RequestType.COMPUTE } };
}

@Test(timeOut = 30000, dataProvider = "multiGet_compute")
/** A simple Runnable-like interface just to reduce boilerplate in the test... */
private interface ScatterCall {
void run() throws RouterException;
}

@Test(timeOut = 30000, dataProvider = "multiGet_compute", invocationCount = 100)
public void testMultiKeyThrottling(RequestType requestType) throws Exception {
// Allow 10 multi-key requests per second
int batchGetSize = 100;
Expand Down Expand Up @@ -115,18 +122,20 @@ public void testMultiKeyThrottling(RequestType requestType) throws Exception {
HostFinder<Instance, VeniceRole> hostFinder = mock(VeniceHostFinder.class);
HostHealthMonitor<Instance> hostHealthMonitor = mock(HostHealthMonitor.class);

ScatterCall scatterCall = () -> delegateMode.scatter(
scatter,
HttpMethod.POST.name(),
storeName + "_v1",
partitionFinder,
hostFinder,
hostHealthMonitor,
VeniceRole.REPLICA);

// The router shouldn't throttle any request if the multi-get QPS is below 10
for (int iter = 0; iter < 3; iter++) {
for (int i = 0; i < allowedQPS; i++) {
try {
delegateMode.scatter(
scatter,
HttpMethod.POST.name(),
storeName + "_v1",
partitionFinder,
hostFinder,
hostHealthMonitor,
VeniceRole.REPLICA);
scatterCall.run();
} catch (Exception e) {
Assert.fail("router shouldn't throttle any multi-get requests if the QPS is below " + allowedQPS);
}
Expand All @@ -138,16 +147,11 @@ public void testMultiKeyThrottling(RequestType requestType) throws Exception {

// Router should throttle the multi-get requests if QPS exceeds 10
boolean multiGetThrottled = false;
for (int i = 0; i < allowedQPS + 1; i++) {
int queriesSent = allowedQPS + 1;
long startTime = System.currentTimeMillis();
for (int i = 0; i < queriesSent; i++) {
try {
delegateMode.scatter(
scatter,
HttpMethod.POST.name(),
storeName + "_v1",
partitionFinder,
hostFinder,
hostHealthMonitor,
VeniceRole.REPLICA);
scatterCall.run();
} catch (Exception e) {
multiGetThrottled = true;
if (i < allowedQPS) {
Expand All @@ -156,9 +160,38 @@ public void testMultiKeyThrottling(RequestType requestType) throws Exception {
}
}
}
long elapsedTime = System.currentTimeMillis() - startTime;

// restore the throttler so that it doesn't affect the following test case
throttler.restoreAllThrottlers();
Assert.assertTrue(multiGetThrottled);
if (!multiGetThrottled) {
int additionalQueriesNeeded = -1;
for (int i = 1; i < queriesSent * 10; i++) {
try {
scatterCall.run();
} catch (Exception e) {
additionalQueriesNeeded = i;
break;
}
}
long totalElapsedTime = System.currentTimeMillis() - startTime;
if (additionalQueriesNeeded < 0) {
Assert.fail(
"Never triggered quota at all, even after sending 10x more than it should have needed! Original elapsed time: "
+ elapsedTime + "; total elapsed time: " + totalElapsedTime);
} else {
/**
* N.B.: This test used to be flaky because it would be 1 request short of triggering the quota. It's not clear
* why it sometimes takes a little more, and sometimes doesn't (floating point arithmetic being wonky, perhaps?)
* but here we're adding a small tolerance threshold, while still ensuring that larger deviations still result
* in failure.
*/
int toleranceThreshold = 1;
assertTrue(
additionalQueriesNeeded <= toleranceThreshold,
"Should have triggered quota in " + queriesSent + " requests, but it took " + additionalQueriesNeeded
+ " additional request(s) before finally triggering, which is higher than the tolerance threshold of "
+ toleranceThreshold + ". Original elapsed time: " + elapsedTime + "; total elapsed time: "
+ totalElapsedTime);
}
}
}
}

0 comments on commit 9a5d502

Please sign in to comment.