diff --git a/README.md b/README.md index 5012937..4205091 100644 --- a/README.md +++ b/README.md @@ -92,18 +92,19 @@ Example IAM policy # Options `plz run //cmd:aws-service-quotas-exporter -- [OPTIONS]` -| Short Flag | Long Flag | Env var | Description | -|------------|-----------|-------------|--------------------------------| -| -p | --port | N/A | Port on which to serve metrics | -| -r | --region | AWS_REGION | AWS region | -| -f | --profile | AWS_PROFILE | Named AWS profile | +| Short Flag | Long Flag | Env var | Description | +|------------|--------------------|----------------------|-------------------------------------------------------------------| +| -p | --port | N/A | Port on which to serve metrics | +| -r | --region | AWS_REGION | AWS region | +| -f | --profile | AWS_PROFILE | Named AWS profile | +| N/A | --include-aws-tag | N/A | The aws resource tags to include as labels for returned metrics | # Building the exporter and running the exporter ## Building the binary with please `plz build //cmd:aws-service-quotas-exporter` -`plz run //cmd:aws-service-quotas-exporter -- -p 9090 -r eu-west-1 --profile myprofile` +`plz run //cmd:aws-service-quotas-exporter -- -p 9090 -r eu-west-1 --profile myprofile --include-aws-tag 'tag1' --include-aws-tag 'tag2'` ## Docker image `docker build -f build/Dockerfile-builder . --rm=false` diff --git a/cmd/main.go b/cmd/main.go index 3ad5919..40f90be 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -14,16 +14,16 @@ import ( var log = logging.WithFields(logging.Fields{}) var opts struct { - Port int `long:"port" short:"p" default:"9090" description:"Port on which to serve."` - Region string `long:"region" short:"r" env:"AWS_REGION" required:"true" description:"AWS region name"` - Profile string `long:"profile" short:"f" env:"AWS_PROFILE" default:"" description:"Named AWS profile to be used"` - RefreshPeriod int `long:"refresh-period" default:"360" description:"Refresh period in seconds"` + Port int `long:"port" short:"p" default:"9090" description:"Port on which to serve."` + Region string `long:"region" short:"r" env:"AWS_REGION" required:"true" description:"AWS region name"` + Profile string `long:"profile" short:"f" env:"AWS_PROFILE" default:"" description:"Named AWS profile to be used"` + RefreshPeriod int `long:"refresh-period" default:"360" description:"Refresh period in seconds"` + IncludeAWSTags []string `long:"include-aws-tag" description:"The aws resource tags to include as labels for returned metrics"` } func main() { flags.Parse(&opts) - - quotasExporter, err := serviceexporter.NewServiceQuotasExporter(opts.Region, opts.Profile, opts.RefreshPeriod) + quotasExporter, err := serviceexporter.NewServiceQuotasExporter(opts.Region, opts.Profile, opts.RefreshPeriod, opts.IncludeAWSTags) if err != nil { log.Fatalf("Failed to create exporter: %s", err) } diff --git a/pkg/service_exporter/service_exporter.go b/pkg/service_exporter/service_exporter.go index f01a071..b498b38 100644 --- a/pkg/service_exporter/service_exporter.go +++ b/pkg/service_exporter/service_exporter.go @@ -14,11 +14,11 @@ var log = logging.WithFields(logging.Fields{}) // Metric holds usage and limit desc and values type Metric struct { - resourceID string - usageDesc *prometheus.Desc - limitDesc *prometheus.Desc - usage float64 - limit float64 + usageDesc *prometheus.Desc + limitDesc *prometheus.Desc + usage float64 + limit float64 + labelValues []string } func metricKey(quota servicequotas.QuotaUsage) string { @@ -28,15 +28,16 @@ func metricKey(quota servicequotas.QuotaUsage) string { // ServiceQuotasExporter AWS service quotas and usage prometheus // exporter type ServiceQuotasExporter struct { - metricsRegion string - quotasClient servicequotas.QuotasInterface - metrics map[string]Metric - refreshPeriod int - waitForMetrics chan struct{} + metricsRegion string + quotasClient servicequotas.QuotasInterface + metrics map[string]Metric + refreshPeriod int + waitForMetrics chan struct{} + includedAWSTags []string } // NewServiceQuotasExporter creates a new ServiceQuotasExporter -func NewServiceQuotasExporter(region, profile string, refreshPeriod int) (*ServiceQuotasExporter, error) { +func NewServiceQuotasExporter(region, profile string, refreshPeriod int, includedAWSTags []string) (*ServiceQuotasExporter, error) { quotasClient, err := servicequotas.NewServiceQuotas(region, profile) if err != nil { return nil, errors.Wrapf(err, "%w") @@ -44,13 +45,14 @@ func NewServiceQuotasExporter(region, profile string, refreshPeriod int) (*Servi ch := make(chan struct{}) exporter := &ServiceQuotasExporter{ - metricsRegion: region, - quotasClient: quotasClient, - metrics: map[string]Metric{}, - refreshPeriod: refreshPeriod, - waitForMetrics: ch, + metricsRegion: region, + quotasClient: quotasClient, + metrics: map[string]Metric{}, + refreshPeriod: refreshPeriod, + waitForMetrics: ch, + includedAWSTags: includedAWSTags, } - go exporter.createQuotasAndDescriptions() + go exporter.createQuotasAndDescriptions(false) go exporter.refreshMetrics() return exporter, nil @@ -66,52 +68,57 @@ func (e *ServiceQuotasExporter) refreshMetrics() { } func (e *ServiceQuotasExporter) updateMetrics() { - quotas, err := e.quotasClient.QuotasAndUsage() - if err != nil { - log.Fatalf("Could not retrieve quotas and limits: %s", err) - } - - for _, quota := range quotas { - key := metricKey(quota) - log.Infof("Refreshing metrics for resource (%s)", quota.Identifier()) - if resourceMetric, ok := e.metrics[key]; ok { - resourceMetric.usage = quota.Usage - resourceMetric.limit = quota.Quota - e.metrics[key] = resourceMetric - } - } + e.createQuotasAndDescriptions(true) } -func (e *ServiceQuotasExporter) createQuotasAndDescriptions() { +func (e *ServiceQuotasExporter) createQuotasAndDescriptions(refresh bool) { quotas, err := e.quotasClient.QuotasAndUsage() if err != nil { log.Fatalf("Could not retrieve quotas and limits: %s", err) } for _, quota := range quotas { - // check so we don't report the same metric more than once key := metricKey(quota) - if _, ok := e.metrics[key]; ok { - continue - } + resourceID := quota.Identifier() - usageHelp := fmt.Sprintf("Used amount of %s", quota.Description) - usageDesc := newDesc(e.metricsRegion, quota.Name, "used_total", usageHelp, []string{"resource"}) + labels := []string{"resource"} + labelValues := []string{resourceID} - limitHelp := fmt.Sprintf("Limit of %s", quota.Description) - limitDesc := newDesc(e.metricsRegion, quota.Name, "limit_total", limitHelp, []string{"resource"}) + for _, tag := range e.includedAWSTags { + prometheusFormatTag := servicequotas.ToPrometheusNamingFormat(tag) + labels = append(labels, prometheusFormatTag) + // Need to set empty label value to keep label name and value count the same + labelValues = append(labelValues, quota.Tags[prometheusFormatTag]) + } - resourceMetric := Metric{ - resourceID: quota.Identifier(), - usageDesc: usageDesc, - limitDesc: limitDesc, - usage: quota.Usage, - limit: quota.Quota, + if refresh { + if resourceMetric, ok := e.metrics[key]; ok { + log.Infof("Refreshing metrics for resource (%s)", resourceID) + resourceMetric.usage = quota.Usage + resourceMetric.limit = quota.Quota + resourceMetric.labelValues = labelValues + e.metrics[key] = resourceMetric + } + } else { + usageHelp := fmt.Sprintf("Used amount of %s", quota.Description) + usageDesc := newDesc(e.metricsRegion, quota.Name, "used_total", usageHelp, labels) + + limitHelp := fmt.Sprintf("Limit of %s", quota.Description) + limitDesc := newDesc(e.metricsRegion, quota.Name, "limit_total", limitHelp, labels) + resourceMetric := Metric{ + usageDesc: usageDesc, + limitDesc: limitDesc, + usage: quota.Usage, + limit: quota.Quota, + labelValues: labelValues, + } + e.metrics[key] = resourceMetric } - e.metrics[key] = resourceMetric } - close(e.waitForMetrics) + if !refresh { + close(e.waitForMetrics) + } } // Describe writes descriptors to the prometheus desc channel @@ -127,8 +134,8 @@ func (e *ServiceQuotasExporter) Describe(ch chan<- *prometheus.Desc) { // Collect implements the collect function for prometheus collectors func (e *ServiceQuotasExporter) Collect(ch chan<- prometheus.Metric) { for _, metric := range e.metrics { - ch <- prometheus.MustNewConstMetric(metric.limitDesc, prometheus.GaugeValue, metric.limit, metric.resourceID) - ch <- prometheus.MustNewConstMetric(metric.usageDesc, prometheus.GaugeValue, metric.usage, metric.resourceID) + ch <- prometheus.MustNewConstMetric(metric.limitDesc, prometheus.GaugeValue, metric.limit, metric.labelValues...) + ch <- prometheus.MustNewConstMetric(metric.usageDesc, prometheus.GaugeValue, metric.usage, metric.labelValues...) } } diff --git a/pkg/service_exporter/service_exporter_test.go b/pkg/service_exporter/service_exporter_test.go index 7daa076..0beffd2 100644 --- a/pkg/service_exporter/service_exporter_test.go +++ b/pkg/service_exporter/service_exporter_test.go @@ -24,7 +24,7 @@ func (s *ServiceQuotasMock) QuotasAndUsage() ([]servicequotas.QuotaUsage, error) func TestUpdateMetrics(t *testing.T) { quotasClient := &ServiceQuotasMock{ quotas: []servicequotas.QuotaUsage{ - {ResourceName: resourceName("i-asdasd1"), Usage: 5, Quota: 10}, + {ResourceName: resourceName("i-asdasd1"), Usage: 5, Quota: 10, Tags: map[string]string{"dummy_tag": "dummy-value"}}, {ResourceName: resourceName("i-asdasd2"), Usage: 2, Quota: 3}, {ResourceName: resourceName("i-asdasd3"), Usage: 5, Quota: 10}, }, @@ -34,17 +34,18 @@ func TestUpdateMetrics(t *testing.T) { metricsRegion: "eu-west-1", quotasClient: quotasClient, metrics: map[string]Metric{ - "i-asdasd1": Metric{usage: 3, limit: 5}, + "i-asdasd1": Metric{usage: 3, limit: 5, labelValues: []string{"before-dummy-value"}}, "i-asdasd2": Metric{usage: 2, limit: 2}, }, - refreshPeriod: 360, + includedAWSTags: []string{"dummy-tag"}, + refreshPeriod: 360, } exporter.updateMetrics() expectedMetrics := map[string]Metric{ - "i-asdasd1": Metric{usage: 5, limit: 10}, - "i-asdasd2": Metric{usage: 2, limit: 3}, + "i-asdasd1": Metric{usage: 5, limit: 10, labelValues: []string{"i-asdasd1", "dummy-value"}}, + "i-asdasd2": Metric{usage: 2, limit: 3, labelValues: []string{"i-asdasd2", ""}}, } assert.Equal(t, expectedMetrics, exporter.metrics) } @@ -65,6 +66,7 @@ func TestCreateQuotasAndDescriptions(t *testing.T) { Description: "desc2", Usage: 1, Quota: 8, + Tags: map[string]string{"dummy_tag": "dummy-value", "dummy_tag2": "dummy-value2"}, } quotasClient := &ServiceQuotasMock{ quotas: []servicequotas.QuotaUsage{firstQ, secondQ}, @@ -72,35 +74,74 @@ func TestCreateQuotasAndDescriptions(t *testing.T) { ch := make(chan struct{}) exporter := &ServiceQuotasExporter{ - metricsRegion: region, - quotasClient: quotasClient, - metrics: map[string]Metric{}, - refreshPeriod: 360, - waitForMetrics: ch, + metricsRegion: region, + quotasClient: quotasClient, + metrics: map[string]Metric{}, + refreshPeriod: 360, + waitForMetrics: ch, + includedAWSTags: []string{"dummy-tag", "dummy-tag2"}, } - exporter.createQuotasAndDescriptions() + exporter.createQuotasAndDescriptions(false) - firstUsageDesc := newDesc(region, firstQ.Name, "used_total", "Used amount of desc1", []string{"resource"}) - firstLimitDesc := newDesc(region, firstQ.Name, "limit_total", "Limit of desc1", []string{"resource"}) - secondUsageDesc := newDesc(region, secondQ.Name, "used_total", "Used amount of desc2", []string{"resource"}) - secondLimitDesc := newDesc(region, secondQ.Name, "limit_total", "Limit of desc2", []string{"resource"}) + firstUsageDesc := newDesc(region, firstQ.Name, "used_total", "Used amount of desc1", []string{"resource", "dummy_tag", "dummy_tag2"}) + firstLimitDesc := newDesc(region, firstQ.Name, "limit_total", "Limit of desc1", []string{"resource", "dummy_tag", "dummy_tag2"}) + secondUsageDesc := newDesc(region, secondQ.Name, "used_total", "Used amount of desc2", []string{"resource", "dummy_tag", "dummy_tag2"}) + secondLimitDesc := newDesc(region, secondQ.Name, "limit_total", "Limit of desc2", []string{"resource", "dummy_tag", "dummy_tag2"}) expectedMetrics := map[string]Metric{ "Name1i-asdasd1": Metric{ - resourceID: "i-asdasd1", - usageDesc: firstUsageDesc, - limitDesc: firstLimitDesc, - usage: 5, - limit: 10, + usageDesc: firstUsageDesc, + limitDesc: firstLimitDesc, + usage: 5, + limit: 10, + labelValues: []string{"i-asdasd1", "", ""}, }, "Name2i-asdasd2": Metric{ - resourceID: "i-asdasd2", - usageDesc: secondUsageDesc, - limitDesc: secondLimitDesc, - usage: 1, - limit: 8, + usageDesc: secondUsageDesc, + limitDesc: secondLimitDesc, + usage: 1, + limit: 8, + labelValues: []string{"i-asdasd2", "dummy-value", "dummy-value2"}, }, } assert.Equal(t, expectedMetrics, exporter.metrics) } + +func TestCreateQuotasAndDescriptionsRefresh(t *testing.T) { + quotasClient := &ServiceQuotasMock{ + quotas: []servicequotas.QuotaUsage{ + {ResourceName: resourceName("i-asdasd1"), + Usage: 5, + Quota: 10, + Tags: map[string]string{"dummy_tag": "dummy-value"}, + Description: "This won't change the metric description for update", + }, + {ResourceName: resourceName("i-asdasd3"), Usage: 5, Quota: 10}, + }, + } + + desc := newDesc("eu-west-1", "some-quota", "some-metric", "help", []string{}) + + ch := make(chan struct{}) + exporter := &ServiceQuotasExporter{ + metricsRegion: "eu-west-1", + quotasClient: quotasClient, + metrics: map[string]Metric{ + "i-asdasd1": Metric{usage: 3, limit: 5, labelValues: []string{"before-dummy-value"}, usageDesc: desc}, + }, + waitForMetrics: ch, + includedAWSTags: []string{"dummy-tag"}, + refreshPeriod: 360, + } + + exporter.updateMetrics() + + expectedMetrics := map[string]Metric{ + "i-asdasd1": Metric{usage: 5, limit: 10, labelValues: []string{"i-asdasd1", "dummy-value"}, usageDesc: desc}, + } + + assert.Equal(t, expectedMetrics, exporter.metrics) + + close(ch) // should panic if it was already closed +} diff --git a/pkg/service_quotas/asg_limits.go b/pkg/service_quotas/asg_limits.go index f796de8..0ff7773 100644 --- a/pkg/service_quotas/asg_limits.go +++ b/pkg/service_quotas/asg_limits.go @@ -41,6 +41,7 @@ func (c *ASGUsageCheck) Usage() ([]QuotaUsage, error) { Description: numInstancesPerASGDescription, Usage: float64(numRunningInstances), Quota: float64(*asg.MaxSize), + Tags: autoscalingTagsToQuotaUsageTags(asg.Tags), } quotaUsages = append(quotaUsages, quotaUsage) } @@ -68,3 +69,17 @@ func isRunning(instance *autoscaling.Instance) bool { _, isNotRunning := notRunningStates[*instance.LifecycleState] return !isNotRunning } + +func autoscalingTagsToQuotaUsageTags(tags []*autoscaling.TagDescription) map[string]string { + length := len(tags) + if length == 0 { + return nil + } + + out := make(map[string]string, length) + for _, tag := range tags { + out[ToPrometheusNamingFormat(*tag.Key)] = *tag.Value + } + + return out +} diff --git a/pkg/service_quotas/ec2_limits.go b/pkg/service_quotas/ec2_limits.go index 588bfe4..961a95d 100644 --- a/pkg/service_quotas/ec2_limits.go +++ b/pkg/service_quotas/ec2_limits.go @@ -54,6 +54,8 @@ func (c *RulesPerSecurityGroupUsageCheck) Usage() ([]QuotaUsage, error) { var inboundRules int = 0 var outboundRules int = 0 + tags := ec2TagsToQuotaUsageTags(group.Tags) + for _, rule := range group.IpPermissions { inboundRules += len(rule.IpRanges) inboundRules += len(rule.UserIdGroupPairs) @@ -64,6 +66,7 @@ func (c *RulesPerSecurityGroupUsageCheck) Usage() ([]QuotaUsage, error) { ResourceName: group.GroupId, Description: inboundRulesPerSecGrpDesc, Usage: float64(inboundRules), + Tags: tags, } for _, rule := range group.IpPermissionsEgress { @@ -76,6 +79,7 @@ func (c *RulesPerSecurityGroupUsageCheck) Usage() ([]QuotaUsage, error) { ResourceName: group.GroupId, Description: outboundRulesPerSecGrpDesc, Usage: float64(outboundRules), + Tags: tags, } quotaUsages = append(quotaUsages, []QuotaUsage{inboundUsage, outboundUsage}...) @@ -113,6 +117,7 @@ func (c *SecurityGroupsPerENIUsageCheck) Usage() ([]QuotaUsage, error) { ResourceName: eni.NetworkInterfaceId, Description: secGroupsPerENIDesc, Usage: float64(len(eni.Groups)), + Tags: ec2TagsToQuotaUsageTags(eni.TagSet), } quotaUsages = append(quotaUsages, usage) } @@ -330,6 +335,7 @@ func (c *AvailableIpsPerSubnetUsageCheck) Usage() ([]QuotaUsage, error) { Description: availableIPsPerSubnetDesc, Usage: usage, Quota: float64(maxNumOfIPs), + Tags: ec2TagsToQuotaUsageTags(subnet.Tags), } availabilityInfos = append(availabilityInfos, availabilityInfo) } @@ -347,3 +353,17 @@ func (c *AvailableIpsPerSubnetUsageCheck) Usage() ([]QuotaUsage, error) { return availabilityInfos, nil } + +func ec2TagsToQuotaUsageTags(tags []*ec2.Tag) map[string]string { + length := len(tags) + if length == 0 { + return nil + } + + out := make(map[string]string, length) + for _, tag := range tags { + out[ToPrometheusNamingFormat(*tag.Key)] = *tag.Value + } + + return out +} diff --git a/pkg/service_quotas/service_quotas.go b/pkg/service_quotas/service_quotas.go index 466189e..64db951 100644 --- a/pkg/service_quotas/service_quotas.go +++ b/pkg/service_quotas/service_quotas.go @@ -70,6 +70,9 @@ type QuotaUsage struct { Usage float64 // Quota is the current quota Quota float64 + + // Tags are the metadata associated with the resource in form of key, value pairs + Tags map[string]string } // Identifier for the service quota. Either the resource name in case @@ -104,7 +107,7 @@ type QuotasInterface interface { func NewServiceQuotas(region, profile string) (QuotasInterface, error) { validRegion, isChina := isValidRegion(region) if !validRegion { - return nil, errors.Wrapf(ErrInvalidRegion, "failed to create ServiceQuotas: %w") + return nil, errors.Wrapf(ErrInvalidRegion, "failed to create ServiceQuotas") } opts := session.Options{} diff --git a/pkg/service_quotas/tags.go b/pkg/service_quotas/tags.go new file mode 100644 index 0000000..7db1398 --- /dev/null +++ b/pkg/service_quotas/tags.go @@ -0,0 +1,19 @@ +package servicequotas + +import ( + "regexp" + "strings" +) + + +var invalidLabelCharactersRE = regexp.MustCompile(`[^a-zA-Z0-9_]`) +var matchAllCap = regexp.MustCompile("([a-z0-9])([A-Z])") + +func ToPrometheusNamingFormat(s string) string { + return toSnakeCase(invalidLabelCharactersRE.ReplaceAllString(s, "_")) +} + +func toSnakeCase(s string) string { + snake := matchAllCap.ReplaceAllString(s, "${1}_${2}") + return strings.ToLower(snake) +}