From 83e5428d4ba9915091550956408e49049302c9df Mon Sep 17 00:00:00 2001 From: Nikolay Sivko Date: Wed, 14 Jun 2023 16:40:38 +0300 Subject: [PATCH] caching RDS/Elasticache instance IP to report metrics even if there are issues with DNS --- elasticache/collector.go | 25 ++++++++++++++----------- rds/collector.go | 23 ++++++++++++++--------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/elasticache/collector.go b/elasticache/collector.go index 556d614..0e2ae5c 100644 --- a/elasticache/collector.go +++ b/elasticache/collector.go @@ -30,6 +30,7 @@ type Collector struct { metricCollector prometheus.Collector cluster elasticache.CacheCluster node elasticache.CacheNode + ip *net.IPAddr logger logger.Logger } @@ -44,7 +45,10 @@ func NewCollector(sess *session.Session, cluster *elasticache.CacheCluster, node node: *node, logger: logger.NewKlog(aws.StringValue(cluster.CacheClusterId)), } - + var err error + if c.ip, err = net.ResolveIPAddr("", aws.StringValue(c.node.Endpoint.Address)); err != nil { + return nil, err + } c.startMetricCollector() return c, nil } @@ -55,6 +59,12 @@ func (c *Collector) update(cluster *elasticache.CacheCluster, n *elasticache.Cac c.node = *n c.startMetricCollector() } + ip, err := net.ResolveIPAddr("", aws.StringValue(c.node.Endpoint.Address)) + if err != nil { + c.logger.Error(err) + } else { + c.ip = ip + } c.cluster = *cluster c.node = *n } @@ -62,7 +72,7 @@ func (c *Collector) update(cluster *elasticache.CacheCluster, n *elasticache.Cac func (c *Collector) startMetricCollector() { switch aws.StringValue(c.cluster.Engine) { case "redis": - url := fmt.Sprintf("redis://%s:%d", aws.StringValue(c.node.Endpoint.Address), aws.Int64Value(c.node.Endpoint.Port)) + url := fmt.Sprintf("redis://%s:%d", c.ip.String(), aws.Int64Value(c.node.Endpoint.Port)) opts := exporter.Options{ Namespace: "redis", ConfigCommandName: "CONFIG", @@ -77,7 +87,7 @@ func (c *Collector) startMetricCollector() { c.metricCollector = collector } case "memcached": - address := fmt.Sprintf("%s:%d", aws.StringValue(c.node.Endpoint.Address), aws.Int64Value(c.node.Endpoint.Port)) + address := fmt.Sprintf("%s:%d", c.ip.String(), aws.Int64Value(c.node.Endpoint.Port)) c.metricCollector = mcExporter.New( address, *flags.ElasticacheConnectTimeout, @@ -93,13 +103,6 @@ func (c *Collector) Close() {} func (c *Collector) Collect(ch chan<- prometheus.Metric) { ch <- utils.Gauge(dStatus, 1, aws.StringValue(c.node.CacheNodeStatus)) - var ip string - if a, err := net.ResolveIPAddr("", aws.StringValue(c.node.Endpoint.Address)); err != nil { - c.logger.Warning(err) - } else { - ip = a.String() - } - cluster := aws.StringValue(c.cluster.ReplicationGroupId) if cluster == "" { cluster = aws.StringValue(c.cluster.CacheClusterId) @@ -109,7 +112,7 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { aws.StringValue(c.sess.Config.Region), aws.StringValue(c.node.CustomerAvailabilityZone), aws.StringValue(c.node.Endpoint.Address), - ip, + c.ip.String(), strconv.Itoa(int(aws.Int64Value(c.node.Endpoint.Port))), aws.StringValue(c.cluster.Engine), aws.StringValue(c.cluster.EngineVersion), diff --git a/rds/collector.go b/rds/collector.go index 24e8161..25af4d5 100644 --- a/rds/collector.go +++ b/rds/collector.go @@ -71,6 +71,7 @@ type Collector struct { sess *session.Session region string instance rds.DBInstance + ip *net.IPAddr cloudWatchLogsApi *cloudwatchlogs.CloudWatchLogs @@ -90,6 +91,11 @@ func NewCollector(sess *session.Session, i *rds.DBInstance) (*Collector, error) cloudWatchLogsApi: cloudwatchlogs.New(sess), logger: logger.NewKlog(aws.StringValue(i.DBInstanceIdentifier)), } + var err error + c.ip, err = net.ResolveIPAddr("", aws.StringValue(i.Endpoint.Address)) + if err != nil { + return nil, err + } c.startDbCollector() c.startLogCollector() @@ -107,6 +113,12 @@ func (c *Collector) update(i *rds.DBInstance) { c.instance = *i c.startDbCollector() } + ip, err := net.ResolveIPAddr("", aws.StringValue(i.Endpoint.Address)) + if err != nil { + c.logger.Error(err) + } else { + c.ip = ip + } c.instance = *i } @@ -117,7 +129,7 @@ func (c *Collector) startDbCollector() { i := c.instance switch aws.StringValue(i.Engine) { case "postgres", "aurora-postgresql": - endpoint := net.JoinHostPort(aws.StringValue(i.Endpoint.Address), strconv.Itoa(int(aws.Int64Value(i.Endpoint.Port)))) + endpoint := net.JoinHostPort(c.ip.String(), strconv.Itoa(int(aws.Int64Value(i.Endpoint.Port)))) userPass := url.UserPassword(*flags.RdsDbUser, *flags.RdsDbPassword) connectTimeout := int((*flags.RdsDbConnectTimeout).Seconds()) if connectTimeout < 1 { @@ -163,19 +175,12 @@ func (c *Collector) Collect(ch chan<- prometheus.Metric) { ch <- utils.Gauge(dStatus, 1, aws.StringValue(i.DBInstanceStatus)) - var ip string - if a, err := net.ResolveIPAddr("", aws.StringValue(i.Endpoint.Address)); err != nil { - c.logger.Warning(err) - } else { - ip = a.String() - } - ch <- utils.Gauge(dInfo, 1, c.region, aws.StringValue(i.AvailabilityZone), aws.StringValue(i.Endpoint.Address), - ip, + c.ip.String(), strconv.Itoa(int(aws.Int64Value(i.Endpoint.Port))), aws.StringValue(i.Engine),