Skip to content

Commit

Permalink
[router] Fix invalid tehuti sensor name passed in for latencies (#1351)
Browse files Browse the repository at this point in the history
If an invalid TehutiMetricNameEnum that was not registered with MetricEntityState is passed in, it will be skipped during record() and will be silently ignored. Found this issue for healthy_request_latency and this PR fixes this issue and adds logging with redundant logging filter to log such issues. Not throwing an exception as we will be moving a lot of the existing metrics to Otel and tracking this log will help find and fix the issue during the migration and not affect the rollout process.
  • Loading branch information
m-nagarajan authored Nov 27, 2024
1 parent 81d6c85 commit 348ce77
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
import io.opentelemetry.api.metrics.LongCounter;
import io.tehuti.metrics.MeasurableStat;
import io.tehuti.metrics.Sensor;
import io.tehuti.utils.RedundantLogFilter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;


/**
Expand All @@ -18,7 +21,9 @@
* 3. multiple tehuti Sensors for this Otel Metric
*/
public class MetricEntityState {
private MetricEntity metricEntity;
private static final Logger LOGGER = LogManager.getLogger(MetricEntityState.class);
private static final RedundantLogFilter REDUNDANT_LOG_FILTER = RedundantLogFilter.getRedundantLogFilter();
private final MetricEntity metricEntity;
/** Otel metric */
private Object otelMetric = null;
/** Map of tehuti names and sensors: 1 Otel metric can cover multiple Tehuti sensors */
Expand Down Expand Up @@ -105,6 +110,12 @@ void recordTehutiMetric(TehutiMetricNameEnum tehutiMetricNameEnum, double value)
Sensor sensor = tehutiSensors.get(tehutiMetricNameEnum);
if (sensor != null) {
sensor.record(value);
} else {
// Log using Redundant log filters to catch any bad tehutiMetricNameEnum is passed in
String errorLog = "Tehuti Sensor with name '" + tehutiMetricNameEnum + "' not found.";
if (!REDUNDANT_LOG_FILTER.isRedundantLog(errorLog)) {
LOGGER.error(errorLog);
}
}
}
}
Expand All @@ -119,7 +130,13 @@ public void record(TehutiMetricNameEnum tehutiMetricNameEnum, double value, Attr
recordTehutiMetric(tehutiMetricNameEnum, value);
}

/** used only for testing */
Map<TehutiMetricNameEnum, Sensor> getTehutiSensors() {
return tehutiSensors;
}

/** used only for testing */
static RedundantLogFilter getRedundantLogFilter() {
return REDUNDANT_LOG_FILTER;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,16 @@
import static org.mockito.Mockito.times;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;

import com.linkedin.venice.stats.VeniceOpenTelemetryMetricsRepository;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.LongCounter;
import io.tehuti.metrics.MeasurableStat;
import io.tehuti.metrics.Sensor;
import io.tehuti.utils.RedundantLogFilter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -26,7 +29,7 @@ public class MetricEntityStateTest {
private Sensor mockSensor;

private enum TestTehutiMetricNameEnum implements TehutiMetricNameEnum {
TEST_METRIC;
TEST_METRIC_1, TEST_METRIC_2;

private final String metricName;

Expand Down Expand Up @@ -65,19 +68,19 @@ public void testCreateMetricWithOtelEnabled() {
@Test
public void testAddTehutiSensorsSuccessfully() {
MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);

Assert.assertNotNull(metricEntityState.getTehutiSensors());
Assert.assertTrue(metricEntityState.getTehutiSensors().containsKey(TestTehutiMetricNameEnum.TEST_METRIC));
assertTrue(metricEntityState.getTehutiSensors().containsKey(TestTehutiMetricNameEnum.TEST_METRIC_1));
}

@Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*Sensor with name 'TEST_METRIC' already exists.*")
@Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = ".*Sensor with name 'TEST_METRIC_1' already exists.*")
public void testAddTehutiSensorThrowsExceptionOnDuplicate() {
MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);

// Adding the same sensor name again should throw an exception
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);
}

@Test
Expand Down Expand Up @@ -111,9 +114,9 @@ public void testRecordOtelMetricCounter() {
@Test
public void testRecordTehutiMetric() {
MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);

metricEntityState.recordTehutiMetric(TestTehutiMetricNameEnum.TEST_METRIC, 15.0);
metricEntityState.recordTehutiMetric(TestTehutiMetricNameEnum.TEST_METRIC_1, 15.0);

verify(mockSensor, times(1)).record(15.0);
}
Expand All @@ -124,13 +127,27 @@ public void testRecordMetricsWithBothOtelAndTehuti() {
when(mockMetricEntity.getMetricType()).thenReturn(MetricType.HISTOGRAM);

MetricEntityState metricEntityState = new MetricEntityState(mockMetricEntity, mockOtelRepository);
RedundantLogFilter logFilter = metricEntityState.getRedundantLogFilter();
metricEntityState.setOtelMetric(doubleHistogram);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC, mockSensor);
metricEntityState.addTehutiSensors(TestTehutiMetricNameEnum.TEST_METRIC_1, mockSensor);

Attributes attributes = Attributes.builder().put("key", "value").build();
metricEntityState.record(TestTehutiMetricNameEnum.TEST_METRIC, 20.0, attributes);

// case 1: check using valid Tehuti metric that was added to metricEntityState
metricEntityState.record(TestTehutiMetricNameEnum.TEST_METRIC_1, 20.0, attributes);
verify(doubleHistogram, times(1)).record(20.0, attributes);
verify(mockSensor, times(1)).record(20.0);
assertFalse(logFilter.isRedundantLog("Tehuti Sensor with name 'TEST_METRIC_1' not found.", false));

// case 2: check using a Tehuti metric that was not added to metricEntityState and verify it called
// REDUNDANT_LOG_FILTER
metricEntityState.record(TestTehutiMetricNameEnum.TEST_METRIC_2, 20.0, attributes);
// otel metric should be called for the second time
verify(doubleHistogram, times(2)).record(20.0, attributes);
// Tehuti metric should be not called for the second time as we passed in an invalid metric name
verify(mockSensor, times(1)).record(20.0);
// This should have invoked the log filter for TEST_METRIC_2
assertFalse(logFilter.isRedundantLog("Tehuti Sensor with name 'TEST_METRIC_1' not found.", false));
assertTrue(logFilter.isRedundantLog("Tehuti Sensor with name 'TEST_METRIC_2' not found.", false));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -363,11 +363,14 @@ public void recordIncomingRequest() {
}

public void recordHealthyRequest(Double latency, HttpResponseStatus responseStatus) {
TehutiMetricNameEnum tehutiMetricNameEnum = RouterTehutiMetricNameEnum.HEALTHY_REQUEST;
VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.HEALTHY;
recordRequestMetric(tehutiMetricNameEnum, responseStatus, veniceResponseStatusCategory);
recordRequestMetric(RouterTehutiMetricNameEnum.HEALTHY_REQUEST, responseStatus, veniceResponseStatusCategory);
if (latency != null) {
recordLatencyMetric(tehutiMetricNameEnum, latency, responseStatus, veniceResponseStatusCategory);
recordLatencyMetric(
RouterTehutiMetricNameEnum.HEALTHY_REQUEST_LATENCY,
latency,
responseStatus,
veniceResponseStatusCategory);
}
}

Expand Down Expand Up @@ -400,10 +403,13 @@ public void recordReadQuotaUsage(int quotaUsage) {
}

public void recordTardyRequest(double latency, HttpResponseStatus responseStatus) {
TehutiMetricNameEnum tehutiMetricNameEnum = RouterTehutiMetricNameEnum.TARDY_REQUEST;
VeniceResponseStatusCategory veniceResponseStatusCategory = VeniceResponseStatusCategory.TARDY;
recordRequestMetric(tehutiMetricNameEnum, responseStatus, veniceResponseStatusCategory);
recordLatencyMetric(tehutiMetricNameEnum, latency, responseStatus, veniceResponseStatusCategory);
recordRequestMetric(RouterTehutiMetricNameEnum.TARDY_REQUEST, responseStatus, veniceResponseStatusCategory);
recordLatencyMetric(
RouterTehutiMetricNameEnum.TARDY_REQUEST_LATENCY,
latency,
responseStatus,
veniceResponseStatusCategory);
}

public void recordThrottledRequest(double latency, HttpResponseStatus responseStatus) {
Expand Down Expand Up @@ -465,7 +471,10 @@ public void recordBadRequest(HttpResponseStatus responseStatus) {
}

public void recordBadRequestKeyCount(int keyCount) {
recordKeyCountMetric(keyCount, RequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED);
recordKeyCountMetric(
RouterTehutiMetricNameEnum.BAD_REQUEST_KEY_COUNT,
keyCount,
RequestValidationOutcome.INVALID_KEY_COUNT_LIMIT_EXCEEDED);
}

public void recordRequestThrottledByRouterCapacity() {
Expand Down Expand Up @@ -548,18 +557,21 @@ public void recordFindUnhealthyHostRequest() {
}

public void recordKeyNum(int keyNum) {
recordKeyCountMetric(keyNum, RequestValidationOutcome.VALID);
recordKeyCountMetric(RouterTehutiMetricNameEnum.KEY_NUM, keyNum, RequestValidationOutcome.VALID);
}

public void recordKeyCountMetric(int keyNum, RequestValidationOutcome outcome) {
public void recordKeyCountMetric(
TehutiMetricNameEnum tehutiMetricNameEnum,
int keyNum,
RequestValidationOutcome outcome) {
Attributes dimensions = null;
if (emitOpenTelemetryMetrics) {
dimensions = Attributes.builder()
.putAll(commonMetricDimensions)
.put(getDimensionName(VENICE_REQUEST_VALIDATION_OUTCOME), outcome.getOutcome())
.build();
}
keyCountMetric.record(RouterTehutiMetricNameEnum.KEY_NUM, keyNum, dimensions);
keyCountMetric.record(tehutiMetricNameEnum, keyNum, dimensions);
}

public void recordRequestUsage(int usage) {
Expand Down

0 comments on commit 348ce77

Please sign in to comment.