From 090c25b50d03203fbe8324fab9e978794ff1d41d Mon Sep 17 00:00:00 2001 From: i583051 Date: Tue, 30 Jan 2024 10:56:31 +0200 Subject: [PATCH] Add request metrics for BBS --- cmd/bbs/main.go | 13 +++ handlers/actual_lrp_handlers.go | 20 +++-- handlers/actual_lrp_handlers_test.go | 18 +++- handlers/actual_lrp_lifecycle_handler.go | 55 ++++++++++-- handlers/actual_lrp_lifecycle_handler_test.go | 18 +++- handlers/context_test.go | 4 +- handlers/desired_lrp_handlers.go | 89 ++++++++++++++----- handlers/desired_lrp_handlers_test.go | 16 ++++ handlers/evacuation_handler.go | 50 +++++++++-- handlers/evacuation_handler_test.go | 24 ++++- handlers/handlers.go | 12 +-- handlers/metrics_recorder.go | 23 +++++ handlers/task_handlers.go | 53 +++++++++-- handlers/task_handlers_test.go | 18 +++- 14 files changed, 350 insertions(+), 63 deletions(-) create mode 100644 handlers/metrics_recorder.go diff --git a/cmd/bbs/main.go b/cmd/bbs/main.go index e4612416..e00f77ed 100644 --- a/cmd/bbs/main.go +++ b/cmd/bbs/main.go @@ -39,6 +39,7 @@ import ( "code.cloudfoundry.org/locket/jointlock" "code.cloudfoundry.org/locket/lock" "code.cloudfoundry.org/locket/lockheldmetrics" + requests "code.cloudfoundry.org/locket/metrics/helpers" locketmodels "code.cloudfoundry.org/locket/models" "code.cloudfoundry.org/rep" "code.cloudfoundry.org/tlsconfig" @@ -248,6 +249,16 @@ func main() { logger.Info("report-interval", lager.Data{"value": bbsConfig.ReportInterval}) fileDescriptorTicker := clock.NewTicker(time.Duration(bbsConfig.ReportInterval)) requestStatsTicker := clock.NewTicker(time.Duration(bbsConfig.ReportInterval)) + requestTypes := []string{ + "DesiredLRPEndpoints", + "DesiredLRPLifecycleEndponts", + "ActualLRPSEndpoint", + "ActualLRPLifecycleEndpoints", + "StartActualLRPEndpoint", + "EvacuationEndpoints", + "TaskEndpoints", + } + requestMetrics := requests.NewRequestMetricsNotifier(logger, clock, metronClient, time.Duration(bbsConfig.ReportInterval), requestTypes) locksHeldTicker := clock.NewTicker(time.Duration(bbsConfig.ReportInterval)) fileDescriptorPath := fmt.Sprintf("/proc/%d/fd", os.Getpid()) @@ -276,6 +287,7 @@ func main() { taskStatMetronNotifier, migrationsDone, exitChan, + requestMetrics, ) bbsElectionMetronNotifier := metrics.NewBBSElectionMetronNotifier(logger, metronClient) @@ -359,6 +371,7 @@ func main() { {Name: "lrp-stat-metron-notifier", Runner: lrpStatMetronNotifier}, {Name: "task-stat-metron-notifier", Runner: taskStatMetronNotifier}, {Name: "db-stat-metron-notifier", Runner: dbStatMetronNotifier}, + {Name: "request-metrics-notifier", Runner: requestMetrics}, } if bbsConfig.DebugAddress != "" { diff --git a/handlers/actual_lrp_handlers.go b/handlers/actual_lrp_handlers.go index 891f8a5f..35f20167 100644 --- a/handlers/actual_lrp_handlers.go +++ b/handlers/actual_lrp_handlers.go @@ -2,21 +2,27 @@ package handlers import ( "net/http" + "time" "code.cloudfoundry.org/bbs/db" "code.cloudfoundry.org/bbs/models" "code.cloudfoundry.org/lager/v3" + "code.cloudfoundry.org/locket/metrics/helpers" ) type ActualLRPHandler struct { - db db.ActualLRPDB - exitChan chan<- struct{} + db db.ActualLRPDB + exitChan chan<- struct{} + requestMetrics helpers.RequestMetrics + metricsGroup string } -func NewActualLRPHandler(db db.ActualLRPDB, exitChan chan<- struct{}) *ActualLRPHandler { +func NewActualLRPHandler(db db.ActualLRPDB, exitChan chan<- struct{}, requestMetrics helpers.RequestMetrics) *ActualLRPHandler { return &ActualLRPHandler{ - db: db, - exitChan: exitChan, + db: db, + exitChan: exitChan, + requestMetrics: requestMetrics, + metricsGroup: "ActualLRPSEndpoint", } } @@ -29,6 +35,10 @@ func (h *ActualLRPHandler) ActualLRPs(logger lager.Logger, w http.ResponseWriter request := &models.ActualLRPsRequest{} response := &models.ActualLRPsResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + err = parseRequest(logger, req, request) if err == nil { var index *int32 diff --git a/handlers/actual_lrp_handlers_test.go b/handlers/actual_lrp_handlers_test.go index aef80860..46ec9925 100644 --- a/handlers/actual_lrp_handlers_test.go +++ b/handlers/actual_lrp_handlers_test.go @@ -5,12 +5,16 @@ import ( "net/http" "net/http/httptest" "strings" + "time" "code.cloudfoundry.org/bbs/db/dbfakes" "code.cloudfoundry.org/bbs/handlers" "code.cloudfoundry.org/bbs/models" + "code.cloudfoundry.org/clock" + mfakes "code.cloudfoundry.org/diego-logging-client/testhelpers" "code.cloudfoundry.org/lager/v3" "code.cloudfoundry.org/lager/v3/lagertest" + "code.cloudfoundry.org/locket/metrics/helpers" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/onsi/gomega/gbytes" @@ -23,6 +27,8 @@ var _ = Describe("ActualLRP Handlers", func() { responseRecorder *httptest.ResponseRecorder handler *handlers.ActualLRPHandler exitCh chan struct{} + requestMetrics *helpers.RequestMetricsNotifier + fakeMetronClient *mfakes.FakeIngressClient actualLRP1 models.ActualLRP actualLRP2 models.ActualLRP @@ -76,7 +82,17 @@ var _ = Describe("ActualLRP Handlers", func() { exitCh = make(chan struct{}, 1) requestIdHeader = "f256f938-9e14-4abd-974f-63c6138f1cca" b3RequestIdHeader = fmt.Sprintf(`"trace-id":"%s"`, strings.Replace(requestIdHeader, "-", "", -1)) - handler = handlers.NewActualLRPHandler(fakeActualLRPDB, exitCh) + + clock := clock.NewClock() + requestMetrics = helpers.NewRequestMetricsNotifier( + logger, + clock, + fakeMetronClient, + 1*time.Minute, + []string{"ActualLRPSEndpoint"}, + ) + + handler = handlers.NewActualLRPHandler(fakeActualLRPDB, exitCh, requestMetrics) }) Describe("ActualLRPs", func() { diff --git a/handlers/actual_lrp_lifecycle_handler.go b/handlers/actual_lrp_lifecycle_handler.go index f8d3defb..bfe590f7 100644 --- a/handlers/actual_lrp_lifecycle_handler.go +++ b/handlers/actual_lrp_lifecycle_handler.go @@ -3,10 +3,12 @@ package handlers import ( "context" "net/http" + "time" "code.cloudfoundry.org/bbs/models" "code.cloudfoundry.org/bbs/trace" "code.cloudfoundry.org/lager/v3" + "code.cloudfoundry.org/locket/metrics/helpers" ) //go:generate counterfeiter -generate @@ -31,28 +33,40 @@ type ActualLRPLifecycleController interface { } type ActualLRPLifecycleHandler struct { - controller ActualLRPLifecycleController - exitChan chan<- struct{} + controller ActualLRPLifecycleController + exitChan chan<- struct{} + requestMetrics helpers.RequestMetrics + metricsGroup string + metricsGroupStart string } func NewActualLRPLifecycleHandler( controller ActualLRPLifecycleController, exitChan chan<- struct{}, + requestMetrics helpers.RequestMetrics, ) *ActualLRPLifecycleHandler { return &ActualLRPLifecycleHandler{ - controller: controller, - exitChan: exitChan, + controller: controller, + exitChan: exitChan, + requestMetrics: requestMetrics, + metricsGroup: "ActualLRPLifecycleEndpoints", + metricsGroupStart: "StartActualLRPEndpoint", } } func (h *ActualLRPLifecycleHandler) ClaimActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { var err error logger = logger.Session("claim-actual-lrp").WithTraceInfo(req) - logger.Debug("starting") - defer logger.Debug("complete") + logger.Info("starting") + defer logger.Info("complete") request := &models.ClaimActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) @@ -67,6 +81,7 @@ func (h *ActualLRPLifecycleHandler) ClaimActualLRP(logger lager.Logger, w http.R } func (h *ActualLRPLifecycleHandler) StartActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("start-actual-lrp").WithTraceInfo(req) logger.Debug("starting") defer logger.Debug("complete") @@ -74,10 +89,14 @@ func (h *ActualLRPLifecycleHandler) StartActualLRP(logger lager.Logger, w http.R request := &models.StartActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroupStart) + defer stopMetrics(h.requestMetrics, h.metricsGroupStart, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return @@ -119,16 +138,22 @@ func (h *ActualLRPLifecycleHandler) StartActualLRP_r0(logger lager.Logger, w htt } func (h *ActualLRPLifecycleHandler) CrashActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("crash-actual-lrp").WithTraceInfo(req) logger.Debug("starting") defer logger.Debug("complete") request := &models.CrashActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return @@ -150,6 +175,10 @@ func (h *ActualLRPLifecycleHandler) FailActualLRP(logger lager.Logger, w http.Re request := &models.FailActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) @@ -172,6 +201,10 @@ func (h *ActualLRPLifecycleHandler) RemoveActualLRP(logger lager.Logger, w http. request := &models.RemoveActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) @@ -186,13 +219,17 @@ func (h *ActualLRPLifecycleHandler) RemoveActualLRP(logger lager.Logger, w http. } func (h *ActualLRPLifecycleHandler) RetireActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("retire-actual-lrp").WithTraceInfo(req) logger.Debug("starting") defer logger.Debug("complete") request := &models.RetireActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} - var err error + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) diff --git a/handlers/actual_lrp_lifecycle_handler_test.go b/handlers/actual_lrp_lifecycle_handler_test.go index 2acd9942..8a34474d 100644 --- a/handlers/actual_lrp_lifecycle_handler_test.go +++ b/handlers/actual_lrp_lifecycle_handler_test.go @@ -6,13 +6,17 @@ import ( "net/http" "net/http/httptest" "strings" + "time" "code.cloudfoundry.org/bbs/handlers" "code.cloudfoundry.org/bbs/handlers/fake_controllers" "code.cloudfoundry.org/bbs/models" "code.cloudfoundry.org/bbs/serviceclient/serviceclientfakes" + "code.cloudfoundry.org/clock" + mfakes "code.cloudfoundry.org/diego-logging-client/testhelpers" "code.cloudfoundry.org/lager/v3" "code.cloudfoundry.org/lager/v3/lagertest" + "code.cloudfoundry.org/locket/metrics/helpers" "code.cloudfoundry.org/rep/repfakes" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -25,7 +29,9 @@ var _ = Describe("ActualLRP Lifecycle Handlers", func() { responseRecorder *httptest.ResponseRecorder handler *handlers.ActualLRPLifecycleHandler fakeController *fake_controllers.FakeActualLRPLifecycleController + fakeMetronClient *mfakes.FakeIngressClient exitCh chan struct{} + requestMetrics *helpers.RequestMetricsNotifier requestIdHeader string b3RequestIdHeader string @@ -44,7 +50,17 @@ var _ = Describe("ActualLRP Lifecycle Handlers", func() { requestIdHeader = "b67c32c0-1666-49dc-97c1-274332e6b706" b3RequestIdHeader = fmt.Sprintf(`"trace-id":"%s"`, strings.Replace(requestIdHeader, "-", "", -1)) fakeController = &fake_controllers.FakeActualLRPLifecycleController{} - handler = handlers.NewActualLRPLifecycleHandler(fakeController, exitCh) + + clock := clock.NewClock() + requestMetrics = helpers.NewRequestMetricsNotifier( + logger, + clock, + fakeMetronClient, + 1*time.Minute, + []string{"ActualLRPLifecycleEndpoints", "StartActualLRPEndpoint"}, + ) + + handler = handlers.NewActualLRPLifecycleHandler(fakeController, exitCh, requestMetrics) }) Describe("ClaimActualLRP", func() { diff --git a/handlers/context_test.go b/handlers/context_test.go index 48f7f71b..0694ea83 100644 --- a/handlers/context_test.go +++ b/handlers/context_test.go @@ -20,6 +20,7 @@ import ( "code.cloudfoundry.org/clock/fakeclock" "code.cloudfoundry.org/diego-logging-client/testhelpers" "code.cloudfoundry.org/lager/v3/lagertest" + requests "code.cloudfoundry.org/locket/metrics/helpers" "github.com/tedsuo/ifrit" ginkgomon "github.com/tedsuo/ifrit/ginkgomon_v2" @@ -35,6 +36,7 @@ var _ = Describe("Context", func() { sqlConn *sql.DB sqlProcess ifrit.Process migrationProcess ifrit.Process + requestMetrics requests.RequestMetrics ) BeforeEach(func() { @@ -92,7 +94,7 @@ var _ = Describe("Context", func() { Eventually(migrationsDone).Should(BeClosed()) exitCh := make(chan struct{}, 1) - handler = handlers.NewActualLRPHandler(sqlDB, exitCh) + handler = handlers.NewActualLRPHandler(sqlDB, exitCh, requestMetrics) }) AfterEach(func() { diff --git a/handlers/desired_lrp_handlers.go b/handlers/desired_lrp_handlers.go index e42ca792..3816794a 100644 --- a/handlers/desired_lrp_handlers.go +++ b/handlers/desired_lrp_handlers.go @@ -3,6 +3,7 @@ package handlers import ( "context" "net/http" + "time" "code.cloudfoundry.org/auctioneer" "code.cloudfoundry.org/bbs/db" @@ -13,22 +14,26 @@ import ( "code.cloudfoundry.org/bbs/serviceclient" "code.cloudfoundry.org/bbs/trace" "code.cloudfoundry.org/lager/v3" + "code.cloudfoundry.org/locket/metrics/helpers" "code.cloudfoundry.org/rep" "code.cloudfoundry.org/routing-info/internalroutes" "code.cloudfoundry.org/workpool" ) type DesiredLRPHandler struct { - desiredLRPDB db.DesiredLRPDB - actualLRPDB db.ActualLRPDB - desiredHub events.Hub - actualHub events.Hub - actualLRPInstanceHub events.Hub - auctioneerClient auctioneer.Client - repClientFactory rep.ClientFactory - serviceClient serviceclient.ServiceClient - updateWorkersCount int - exitChan chan<- struct{} + desiredLRPDB db.DesiredLRPDB + actualLRPDB db.ActualLRPDB + desiredHub events.Hub + actualHub events.Hub + actualLRPInstanceHub events.Hub + auctioneerClient auctioneer.Client + repClientFactory rep.ClientFactory + serviceClient serviceclient.ServiceClient + updateWorkersCount int + exitChan chan<- struct{} + requestMetrics helpers.RequestMetrics + metricsGroup string + metricsGroupLifecycle string } func NewDesiredLRPHandler( @@ -42,18 +47,22 @@ func NewDesiredLRPHandler( repClientFactory rep.ClientFactory, serviceClient serviceclient.ServiceClient, exitChan chan<- struct{}, + requestMetrics helpers.RequestMetrics, ) *DesiredLRPHandler { return &DesiredLRPHandler{ - desiredLRPDB: desiredLRPDB, - actualLRPDB: actualLRPDB, - desiredHub: desiredHub, - actualHub: actualHub, - actualLRPInstanceHub: actualLRPInstanceHub, - auctioneerClient: auctioneerClient, - repClientFactory: repClientFactory, - serviceClient: serviceClient, - updateWorkersCount: updateWorkersCount, - exitChan: exitChan, + desiredLRPDB: desiredLRPDB, + actualLRPDB: actualLRPDB, + desiredHub: desiredHub, + actualHub: actualHub, + actualLRPInstanceHub: actualLRPInstanceHub, + auctioneerClient: auctioneerClient, + repClientFactory: repClientFactory, + serviceClient: serviceClient, + updateWorkersCount: updateWorkersCount, + exitChan: exitChan, + requestMetrics: requestMetrics, + metricsGroup: "DesiredLRPEndpoints", + metricsGroupLifecycle: "DesiredLRPLifecycleEndponts", } } @@ -64,6 +73,10 @@ func (h *DesiredLRPHandler) commonDesiredLRPs(logger lager.Logger, targetVersion request := &models.DesiredLRPsRequest{} response := &models.DesiredLRPsResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + err = parseRequest(logger, req, request) if err == nil { filter := models.DesiredLRPFilter{Domain: request.Domain, ProcessGuids: request.ProcessGuids} @@ -101,6 +114,10 @@ func (h *DesiredLRPHandler) commonDesiredLRPByProcessGuid(logger lager.Logger, t request := &models.DesiredLRPByProcessGuidRequest{} response := &models.DesiredLRPResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + err = parseRequest(logger, req, request) if err == nil { var desiredLRP *models.DesiredLRP @@ -134,6 +151,10 @@ func (h *DesiredLRPHandler) DesiredLRPSchedulingInfos(logger lager.Logger, w htt request := &models.DesiredLRPsRequest{} response := &models.DesiredLRPSchedulingInfosResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + err = parseRequest(logger, req, request) if err == nil { filter := models.DesiredLRPFilter{ @@ -157,6 +178,10 @@ func (h *DesiredLRPHandler) DesiredLRPRoutingInfos(logger lager.Logger, w http.R request := &models.DesiredLRPsRequest{} response := &models.DesiredLRPsResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + err = parseRequest(logger, req, request) if err == nil { filter := models.DesiredLRPFilter{ @@ -172,14 +197,20 @@ func (h *DesiredLRPHandler) DesiredLRPRoutingInfos(logger lager.Logger, w http.R } func (h *DesiredLRPHandler) DesireDesiredLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("desire-lrp").WithTraceInfo(req) request := &models.DesireLRPRequest{} response := &models.DesiredLRPLifecycleResponse{} + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroupLifecycle) + defer stopMetrics(h.requestMetrics, h.metricsGroupLifecycle, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return @@ -206,14 +237,20 @@ func (h *DesiredLRPHandler) DesireDesiredLRP(logger lager.Logger, w http.Respons } func (h *DesiredLRPHandler) UpdateDesiredLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("update-desired-lrp").WithTraceInfo(req) request := &models.UpdateDesiredLRPRequest{} response := &models.DesiredLRPLifecycleResponse{} + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroupLifecycle) + defer stopMetrics(h.requestMetrics, h.metricsGroupLifecycle, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { logger.Error("failed-parsing-request", err) response.Error = models.ConvertError(err) @@ -268,14 +305,20 @@ func (h *DesiredLRPHandler) UpdateDesiredLRP(logger lager.Logger, w http.Respons } func (h *DesiredLRPHandler) RemoveDesiredLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("remove-desired-lrp").WithTraceInfo(req) request := &models.RemoveDesiredLRPRequest{} response := &models.DesiredLRPLifecycleResponse{} + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroupLifecycle) + defer stopMetrics(h.requestMetrics, h.metricsGroupLifecycle, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return diff --git a/handlers/desired_lrp_handlers_test.go b/handlers/desired_lrp_handlers_test.go index 56ad4cd1..fd7ceda3 100644 --- a/handlers/desired_lrp_handlers_test.go +++ b/handlers/desired_lrp_handlers_test.go @@ -20,8 +20,11 @@ import ( "code.cloudfoundry.org/bbs/models" "code.cloudfoundry.org/bbs/models/test/model_helpers" . "code.cloudfoundry.org/bbs/test_helpers" + "code.cloudfoundry.org/clock" + mfakes "code.cloudfoundry.org/diego-logging-client/testhelpers" "code.cloudfoundry.org/lager/v3" "code.cloudfoundry.org/lager/v3/lagertest" + "code.cloudfoundry.org/locket/metrics/helpers" "code.cloudfoundry.org/rep" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -41,6 +44,8 @@ var _ = Describe("DesiredLRP Handlers", func() { responseRecorder *httptest.ResponseRecorder handler *handlers.DesiredLRPHandler exitCh chan struct{} + requestMetrics *helpers.RequestMetricsNotifier + fakeMetronClient *mfakes.FakeIngressClient desiredLRP1 models.DesiredLRP desiredLRP2 models.DesiredLRP @@ -64,6 +69,16 @@ var _ = Describe("DesiredLRP Handlers", func() { exitCh = make(chan struct{}, 1) requestIdHeader = "25f23d6a-f46d-460e-7135-7ddc0759a198" b3RequestIdHeader = fmt.Sprintf(`"trace-id":"%s"`, strings.Replace(requestIdHeader, "-", "", -1)) + + clock := clock.NewClock() + requestMetrics = helpers.NewRequestMetricsNotifier( + logger, + clock, + fakeMetronClient, + 1*time.Minute, + []string{"DesiredLRPEndpoints", "DesiredLRPLifecycleEndponts"}, + ) + handler = handlers.NewDesiredLRPHandler( 5, fakeDesiredLRPDB, @@ -75,6 +90,7 @@ var _ = Describe("DesiredLRP Handlers", func() { fakeRepClientFactory, fakeServiceClient, exitCh, + requestMetrics, ) }) diff --git a/handlers/evacuation_handler.go b/handlers/evacuation_handler.go index 03c9a928..f3649d55 100644 --- a/handlers/evacuation_handler.go +++ b/handlers/evacuation_handler.go @@ -3,9 +3,11 @@ package handlers import ( "context" "net/http" + "time" "code.cloudfoundry.org/bbs/models" "code.cloudfoundry.org/lager/v3" + "code.cloudfoundry.org/locket/metrics/helpers" "github.com/gogo/protobuf/proto" ) @@ -19,17 +21,22 @@ type EvacuationController interface { } type EvacuationHandler struct { - controller EvacuationController - exitChan chan<- struct{} + controller EvacuationController + exitChan chan<- struct{} + requestMetrics helpers.RequestMetrics + metricsGroup string } func NewEvacuationHandler( controller EvacuationController, exitChan chan<- struct{}, + requestMetrics helpers.RequestMetrics, ) *EvacuationHandler { return &EvacuationHandler{ - controller: controller, - exitChan: exitChan, + controller: controller, + exitChan: exitChan, + requestMetrics: requestMetrics, + metricsGroup: "EvacuationEndpoints", } } @@ -48,6 +55,10 @@ func (h *EvacuationHandler) RemoveEvacuatingActualLRP(logger lager.Logger, w htt request := &models.RemoveEvacuatingActualLRPRequest{} response := &models.RemoveEvacuatingActualLRPResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) @@ -62,16 +73,22 @@ func (h *EvacuationHandler) RemoveEvacuatingActualLRP(logger lager.Logger, w htt } func (h *EvacuationHandler) EvacuateClaimedActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("evacuate-claimed-actual-lrp").WithTraceInfo(req) logger.Info("started") defer logger.Info("completed") request := &models.EvacuateClaimedActualLRPRequest{} response := &models.EvacuationResponse{} + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { logger.Error("failed-parsing-request", err) response.Error = models.ConvertError(err) @@ -85,16 +102,22 @@ func (h *EvacuationHandler) EvacuateClaimedActualLRP(logger lager.Logger, w http } func (h *EvacuationHandler) EvacuateCrashedActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("evacuate-crashed-actual-lrp").WithTraceInfo(req) logger.Info("started") defer logger.Info("completed") request := &models.EvacuateCrashedActualLRPRequest{} response := &models.EvacuationResponse{} + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { logger.Error("failed-parsing-request", err) response.Error = models.ConvertError(err) @@ -106,17 +129,23 @@ func (h *EvacuationHandler) EvacuateCrashedActualLRP(logger lager.Logger, w http } func (h *EvacuationHandler) commonEvacuateRunningActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request, useInternalRoutesAndTags bool) { + var err error logger = logger.Session("evacuate-running-actual-lrp").WithTraceInfo(req) logger.Info("starting") defer logger.Info("completed") response := &models.EvacuationResponse{} response.KeepContainer = true + + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) request := &models.EvacuateRunningActualLRPRequest{} - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return @@ -151,15 +180,20 @@ func (h *EvacuationHandler) EvacuateRunningActualLRP_r0(logger lager.Logger, w h } func (h *EvacuationHandler) EvacuateStoppedActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("evacuate-stopped-actual-lrp").WithTraceInfo(req) request := &models.EvacuateStoppedActualLRPRequest{} response := &models.EvacuationResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { logger.Error("failed-to-parse-request", err) response.Error = models.ConvertError(err) diff --git a/handlers/evacuation_handler_test.go b/handlers/evacuation_handler_test.go index 49dab2a6..ef3cbd2f 100644 --- a/handlers/evacuation_handler_test.go +++ b/handlers/evacuation_handler_test.go @@ -5,13 +5,17 @@ import ( "net/http" "net/http/httptest" "strings" + "time" "code.cloudfoundry.org/bbs/handlers" "code.cloudfoundry.org/bbs/handlers/fake_controllers" "code.cloudfoundry.org/bbs/models" "code.cloudfoundry.org/bbs/models/test/model_helpers" + "code.cloudfoundry.org/clock" + mfakes "code.cloudfoundry.org/diego-logging-client/testhelpers" "code.cloudfoundry.org/lager/v3" "code.cloudfoundry.org/lager/v3/lagertest" + "code.cloudfoundry.org/locket/metrics/helpers" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/onsi/gomega/gbytes" @@ -24,9 +28,11 @@ var _ = Describe("Evacuation Handlers", func() { handler *handlers.EvacuationHandler exitCh chan struct{} - key models.ActualLRPKey - instanceKey models.ActualLRPInstanceKey - controller *fake_controllers.FakeEvacuationController + key models.ActualLRPKey + instanceKey models.ActualLRPInstanceKey + controller *fake_controllers.FakeEvacuationController + requestMetrics *helpers.RequestMetricsNotifier + fakeMetronClient *mfakes.FakeIngressClient requestIdHeader string b3RequestIdHeader string @@ -41,7 +47,17 @@ var _ = Describe("Evacuation Handlers", func() { b3RequestIdHeader = fmt.Sprintf(`"trace-id":"%s"`, strings.Replace(requestIdHeader, "-", "", -1)) controller = &fake_controllers.FakeEvacuationController{} - handler = handlers.NewEvacuationHandler(controller, exitCh) + + clock := clock.NewClock() + requestMetrics = helpers.NewRequestMetricsNotifier( + logger, + clock, + fakeMetronClient, + 1*time.Minute, + []string{"EvacuationEndpoints"}, + ) + + handler = handlers.NewEvacuationHandler(controller, exitCh, requestMetrics) key = models.ActualLRPKey{ ProcessGuid: "some-guid", diff --git a/handlers/handlers.go b/handlers/handlers.go index ea3ecd22..a6489bd2 100644 --- a/handlers/handlers.go +++ b/handlers/handlers.go @@ -16,6 +16,7 @@ import ( "code.cloudfoundry.org/bbs/serviceclient" "code.cloudfoundry.org/bbs/taskworkpool" "code.cloudfoundry.org/lager/v3" + "code.cloudfoundry.org/locket/metrics/helpers" "code.cloudfoundry.org/rep" "github.com/gogo/protobuf/proto" "github.com/tedsuo/rata" @@ -37,10 +38,11 @@ func New( taskStatMetronNotifier metrics.TaskStatMetronNotifier, migrationsDone <-chan struct{}, exitChan chan struct{}, + requestMetrics *helpers.RequestMetricsNotifier, ) http.Handler { pingHandler := NewPingHandler() domainHandler := NewDomainHandler(db, exitChan) - actualLRPHandler := NewActualLRPHandler(db, exitChan) + actualLRPHandler := NewActualLRPHandler(db, exitChan, requestMetrics) actualLRPController := controllers.NewActualLRPLifecycleController( db, db, db, db, auctioneerClient, @@ -55,11 +57,11 @@ func New( actualHub, actualLRPInstanceHub, ) - actualLRPLifecycleHandler := NewActualLRPLifecycleHandler(actualLRPController, exitChan) - evacuationHandler := NewEvacuationHandler(evacuationController, exitChan) - desiredLRPHandler := NewDesiredLRPHandler(updateWorkers, db, db, desiredHub, actualHub, actualLRPInstanceHub, auctioneerClient, repClientFactory, serviceClient, exitChan) + actualLRPLifecycleHandler := NewActualLRPLifecycleHandler(actualLRPController, exitChan, requestMetrics) + evacuationHandler := NewEvacuationHandler(evacuationController, exitChan, requestMetrics) + desiredLRPHandler := NewDesiredLRPHandler(updateWorkers, db, db, desiredHub, actualHub, actualLRPInstanceHub, auctioneerClient, repClientFactory, serviceClient, exitChan, requestMetrics) taskController := controllers.NewTaskController(db, taskCompletionClient, auctioneerClient, serviceClient, repClientFactory, taskHub, taskStatMetronNotifier, maxTaskPlacementRetries) - taskHandler := NewTaskHandler(taskController, exitChan) + taskHandler := NewTaskHandler(taskController, exitChan, requestMetrics) lrpGroupEventsHandler := NewLRPGroupEventsHandler(desiredHub, actualHub) taskEventsHandler := NewTaskEventHandler(taskHub) lrpInstanceEventsHandler := NewLRPInstanceEventHandler(desiredHub, actualLRPInstanceHub) diff --git a/handlers/metrics_recorder.go b/handlers/metrics_recorder.go new file mode 100644 index 00000000..049a43fd --- /dev/null +++ b/handlers/metrics_recorder.go @@ -0,0 +1,23 @@ +package handlers + +import ( + "time" + + "code.cloudfoundry.org/locket/metrics/helpers" +) + +func startMetrics(metrics helpers.RequestMetrics, requestType string) { + metrics.IncrementRequestsStartedCounter(requestType, 1) + metrics.IncrementRequestsInFlightCounter(requestType, 1) +} + +func stopMetrics(metrics helpers.RequestMetrics, requestType string, start time.Time, deferErr *error) { + metrics.DecrementRequestsInFlightCounter(requestType, 1) + metrics.UpdateLatency(requestType, time.Since(start)) + + if deferErr == nil || *deferErr == nil { + metrics.IncrementRequestsSucceededCounter(requestType, 1) + } else { + metrics.IncrementRequestsFailedCounter(requestType, 1) + } +} diff --git a/handlers/task_handlers.go b/handlers/task_handlers.go index 531ace12..f44c33b8 100644 --- a/handlers/task_handlers.go +++ b/handlers/task_handlers.go @@ -9,6 +9,7 @@ import ( "code.cloudfoundry.org/bbs/models" "code.cloudfoundry.org/bbs/trace" "code.cloudfoundry.org/lager/v3" + "code.cloudfoundry.org/locket/metrics/helpers" ) //counterfeiter:generate -o fake_controllers/fake_task_controller.go . TaskController @@ -28,17 +29,22 @@ type TaskController interface { } type TaskHandler struct { - controller TaskController - exitChan chan<- struct{} + controller TaskController + exitChan chan<- struct{} + requestMetrics helpers.RequestMetrics + metricsGroup string } func NewTaskHandler( controller TaskController, exitChan chan<- struct{}, + requestMetrics helpers.RequestMetrics, ) *TaskHandler { return &TaskHandler{ - controller: controller, - exitChan: exitChan, + controller: controller, + exitChan: exitChan, + requestMetrics: requestMetrics, + metricsGroup: "TaskEndpoints", } } @@ -49,6 +55,10 @@ func (h *TaskHandler) commonTasks(logger lager.Logger, targetVersion format.Vers request := &models.TasksRequest{} response := &models.TasksResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() @@ -84,6 +94,10 @@ func (h *TaskHandler) commonTaskByGuid(logger lager.Logger, targetVersion format request := &models.TaskByGuidRequest{} response := &models.TaskResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() @@ -119,6 +133,10 @@ func (h *TaskHandler) DesireTask(logger lager.Logger, w http.ResponseWriter, req request := &models.DesireTaskRequest{} response := &models.TaskLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() @@ -140,6 +158,10 @@ func (h *TaskHandler) StartTask(logger lager.Logger, w http.ResponseWriter, req request := &models.StartTaskRequest{} response := &models.StartTaskResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() @@ -155,15 +177,20 @@ func (h *TaskHandler) StartTask(logger lager.Logger, w http.ResponseWriter, req } func (h *TaskHandler) CancelTask(logger lager.Logger, w http.ResponseWriter, req *http.Request) { + var err error logger = logger.Session("cancel-task").WithTraceInfo(req) request := &models.TaskGuidRequest{} response := &models.TaskLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() - err := parseRequest(logger, req, request) + err = parseRequest(logger, req, request) if err != nil { logger.Error("failed-parsing-request", err) response.Error = models.ConvertError(err) @@ -202,6 +229,10 @@ func (h *TaskHandler) RejectTask(logger lager.Logger, w http.ResponseWriter, req request := &models.RejectTaskRequest{} response := &models.TaskLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() @@ -223,6 +254,10 @@ func (h *TaskHandler) CompleteTask(logger lager.Logger, w http.ResponseWriter, r request := &models.CompleteTaskRequest{} response := &models.TaskLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() @@ -244,6 +279,10 @@ func (h *TaskHandler) ResolvingTask(logger lager.Logger, w http.ResponseWriter, request := &models.TaskGuidRequest{} response := &models.TaskLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() @@ -265,6 +304,10 @@ func (h *TaskHandler) DeleteTask(logger lager.Logger, w http.ResponseWriter, req request := &models.TaskGuidRequest{} response := &models.TaskLifecycleResponse{} + start := time.Now() + startMetrics(h.requestMetrics, h.metricsGroup) + defer stopMetrics(h.requestMetrics, h.metricsGroup, start, &err) + defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer func() { writeResponse(w, response) }() diff --git a/handlers/task_handlers_test.go b/handlers/task_handlers_test.go index 64b1eae4..31163b16 100644 --- a/handlers/task_handlers_test.go +++ b/handlers/task_handlers_test.go @@ -6,6 +6,7 @@ import ( "net/http" "net/http/httptest" "strings" + "time" "code.cloudfoundry.org/bbs/format" "code.cloudfoundry.org/bbs/handlers" @@ -14,8 +15,11 @@ import ( "code.cloudfoundry.org/bbs/models/test/model_helpers" . "code.cloudfoundry.org/bbs/test_helpers" "code.cloudfoundry.org/bbs/trace" + "code.cloudfoundry.org/clock" + mfakes "code.cloudfoundry.org/diego-logging-client/testhelpers" "code.cloudfoundry.org/lager/v3" "code.cloudfoundry.org/lager/v3/lagertest" + "code.cloudfoundry.org/locket/metrics/helpers" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/onsi/gomega/gbytes" @@ -29,6 +33,8 @@ var _ = Describe("Task Handlers", func() { responseRecorder *httptest.ResponseRecorder handler *handlers.TaskHandler exitCh chan struct{} + requestMetrics *helpers.RequestMetricsNotifier + fakeMetronClient *mfakes.FakeIngressClient requestBody interface{} request *http.Request @@ -42,7 +48,17 @@ var _ = Describe("Task Handlers", func() { responseRecorder = httptest.NewRecorder() exitCh = make(chan struct{}, 1) controller = &fake_controllers.FakeTaskController{} - handler = handlers.NewTaskHandler(controller, exitCh) + + clock := clock.NewClock() + requestMetrics = helpers.NewRequestMetricsNotifier( + logger, + clock, + fakeMetronClient, + 1*time.Minute, + []string{"TaskEndpoints"}, + ) + + handler = handlers.NewTaskHandler(controller, exitCh, requestMetrics) requestIdHeader = "e52a3684-8d05-4905-bfeb-f1d59d92eb1d" b3RequestIdHeader = fmt.Sprintf(`"trace-id":"%s"`, strings.Replace(requestIdHeader, "-", "", -1))