Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api-gateway): support monitoring slowest requests with trimmed mean #444

Merged
merged 1 commit into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
328 changes: 328 additions & 0 deletions API.md

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions lib/common/metric/MetricStatistic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export enum MetricStatistic {
* trimmed mean; calculates the average after removing the 0.01% of data points with the highest values
*/
TM9999 = "tm99.99",

/**
* trimmed mean; calculates the average after removing the 1% lowest data points and the 1% highest data points
*/
Expand Down Expand Up @@ -94,6 +95,23 @@ export enum MetricStatistic {
*/
TM70_BOTH = "TM(30%:70%)",

/**
* trimmed mean; calculates the average after removing the 95% lowest data points
*/
TM95_TOP = "TM(95%:100%)",
/**
* trimmed mean; calculates the average after removing the 99% lowest data points
*/
TM99_TOP = "TM(99%:100%)",
/**
* trimmed mean; calculates the average after removing the 99.9% lowest data points
*/
TM999_TOP = "TM(99.9%:100%)",
/**
* trimmed mean; calculates the average after removing the 99.99% lowest data points
*/
TM9999_TOP = "TM(99.99%:100%)",

/**
* winsorized mean; calculates the average while treating the 50% of the highest values to be equal to the value at the 50th percentile
*/
Expand Down
16 changes: 16 additions & 0 deletions lib/common/monitoring/alarms/LatencyAlarmFactory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ export enum LatencyType {
TM99 = "TM99",
TM999 = "TM999",
TM9999 = "TM9999",
TM95_TOP = "TM(95%:100%)",
TM99_TOP = "TM(99%:100%)",
TM999_TOP = "TM(99.9%:100%)",
TM9999_TOP = "TM(99.99%:100%)",
AVERAGE = "Average",
}

Expand Down Expand Up @@ -58,6 +62,14 @@ export function getLatencyTypeStatistic(latencyType: LatencyType) {
return MetricStatistic.TM999;
case LatencyType.TM9999:
return MetricStatistic.TM9999;
case LatencyType.TM95_TOP:
return MetricStatistic.TM95_TOP;
case LatencyType.TM99_TOP:
return MetricStatistic.TM99_TOP;
case LatencyType.TM999_TOP:
return MetricStatistic.TM999_TOP;
case LatencyType.TM9999_TOP:
return MetricStatistic.TM9999_TOP;
case LatencyType.AVERAGE:
return MetricStatistic.AVERAGE;
default:
Expand Down Expand Up @@ -98,6 +110,10 @@ export function getLatencyTypeLabel(latencyType: LatencyType) {
return latencyType.replace("999", "99.9") + averageSuffix;
case LatencyType.P9999:
case LatencyType.TM9999:
case LatencyType.TM95_TOP:
case LatencyType.TM99_TOP:
case LatencyType.TM999_TOP:
case LatencyType.TM9999_TOP:
// we need proper decimal here
return latencyType.replace("9999", "99.99") + averageSuffix;
case LatencyType.AVERAGE:
Expand Down
8 changes: 8 additions & 0 deletions lib/monitoring/aws-apigateway/ApiGatewayMonitoring.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ export interface ApiGatewayMonitoringOptions extends BaseMonitoringProps {
readonly addLatencyTM99Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM95OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM99OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyAverageAlarm?: Record<string, LatencyThreshold>;

readonly addLowTpsAlarm?: Record<string, LowTpsThreshold>;
Expand Down Expand Up @@ -175,6 +179,10 @@ export class ApiGatewayMonitoring extends Monitoring {
[LatencyType.TM99]: props.addLatencyTM99Alarm,
[LatencyType.TM999]: props.addLatencyTM999Alarm,
[LatencyType.TM9999]: props.addLatencyTM9999Alarm,
[LatencyType.TM95_TOP]: props.addLatencyTM95OutlierAlarm,
[LatencyType.TM99_TOP]: props.addLatencyTM99OutlierAlarm,
[LatencyType.TM999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.TM9999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.AVERAGE]: props.addLatencyAverageAlarm,
};

Expand Down
34 changes: 34 additions & 0 deletions lib/monitoring/aws-apigatewayv2/ApiGatewayV2HttpApiMonitoring.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,20 @@ export interface ApiGatewayV2MonitoringOptions extends BaseMonitoringProps {
readonly addLatencyP999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyP9999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyP100Alarm?: Record<string, LatencyThreshold>;

readonly addLatencyTM50Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM70Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM90Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM95Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM99Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999Alarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999Alarm?: Record<string, LatencyThreshold>;

readonly addLatencyTM95OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM99OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM999OutlierAlarm?: Record<string, LatencyThreshold>;
readonly addLatencyTM9999OutlierAlarm?: Record<string, LatencyThreshold>;

readonly addLatencyAverageAlarm?: Record<string, LatencyThreshold>;

readonly addIntegrationLatencyP50Alarm?: Record<string, LatencyThreshold>;
Expand All @@ -75,13 +82,32 @@ export interface ApiGatewayV2MonitoringOptions extends BaseMonitoringProps {
readonly addIntegrationLatencyP999Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyP9999Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyP100Alarm?: Record<string, LatencyThreshold>;

readonly addIntegrationLatencyTM50Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM70Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM90Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM95Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM99Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM999Alarm?: Record<string, LatencyThreshold>;
readonly addIntegrationLatencyTM9999Alarm?: Record<string, LatencyThreshold>;

readonly addIntegrationLatencyTM95OutlierAlarm?: Record<
string,
LatencyThreshold
>;
readonly addIntegrationLatencyTM99OutlierAlarm?: Record<
string,
LatencyThreshold
>;
readonly addIntegrationLatencyTM999OutlierAlarm?: Record<
string,
LatencyThreshold
>;
readonly addIntegrationLatencyTM9999OutlierAlarm?: Record<
string,
LatencyThreshold
>;

readonly addIntegrationLatencyAverageAlarm?: Record<string, LatencyThreshold>;

readonly addLowTpsAlarm?: Record<string, LowTpsThreshold>;
Expand Down Expand Up @@ -200,6 +226,10 @@ export class ApiGatewayV2HttpApiMonitoring extends Monitoring {
[LatencyType.TM99]: props.addLatencyTM99Alarm,
[LatencyType.TM999]: props.addLatencyTM999Alarm,
[LatencyType.TM9999]: props.addLatencyTM9999Alarm,
[LatencyType.TM95_TOP]: props.addLatencyTM95OutlierAlarm,
[LatencyType.TM99_TOP]: props.addLatencyTM99OutlierAlarm,
[LatencyType.TM999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.TM9999_TOP]: props.addLatencyTM999OutlierAlarm,
[LatencyType.AVERAGE]: props.addLatencyAverageAlarm,
};

Expand All @@ -219,6 +249,10 @@ export class ApiGatewayV2HttpApiMonitoring extends Monitoring {
[LatencyType.TM99]: props.addIntegrationLatencyTM99Alarm,
[LatencyType.TM999]: props.addIntegrationLatencyTM999Alarm,
[LatencyType.TM9999]: props.addIntegrationLatencyTM9999Alarm,
[LatencyType.TM95_TOP]: props.addIntegrationLatencyTM95OutlierAlarm,
[LatencyType.TM99_TOP]: props.addIntegrationLatencyTM99OutlierAlarm,
[LatencyType.TM999_TOP]: props.addIntegrationLatencyTM999OutlierAlarm,
[LatencyType.TM9999_TOP]: props.addIntegrationLatencyTM9999OutlierAlarm,
[LatencyType.AVERAGE]: props.addIntegrationLatencyAverageAlarm,
};

Expand Down
52 changes: 50 additions & 2 deletions test/monitoring/aws-apigateway/ApiGatewayMonitoring.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,30 @@ test("snapshot test: all alarms", () => {
datapointsToAlarm: 29999,
},
},
addLatencyTM95OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM99OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM9999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyAverageAlarm: {
Warning: {
maxLatency: Duration.millis(20),
Expand All @@ -170,7 +194,7 @@ test("snapshot test: all alarms", () => {
});

addMonitoringDashboardsToStack(stack, monitoring);
expect(numAlarmsCreated).toStrictEqual(22);
expect(numAlarmsCreated).toStrictEqual(26);
expect(Template.fromStack(stack)).toMatchSnapshot();
});

Expand Down Expand Up @@ -304,6 +328,30 @@ test("snapshot test: all alarms using interface", () => {
datapointsToAlarm: 29999,
},
},
addLatencyTM95OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM99OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyTM9999OutlierAlarm: {
Warning: {
maxLatency: Duration.millis(29999),
datapointsToAlarm: 29999,
},
},
addLatencyAverageAlarm: {
Warning: {
maxLatency: Duration.millis(20),
Expand All @@ -324,6 +372,6 @@ test("snapshot test: all alarms using interface", () => {
});

addMonitoringDashboardsToStack(stack, monitoring);
expect(numAlarmsCreated).toStrictEqual(22);
expect(numAlarmsCreated).toStrictEqual(26);
expect(Template.fromStack(stack)).toMatchSnapshot();
});
Loading