Skip to content

Commit

Permalink
feat: telemetry for api traces, database calls and runtime stats (#102)
Browse files Browse the repository at this point in the history
  • Loading branch information
kushsharma authored Nov 3, 2021
1 parent ecb37f5 commit d1001c8
Show file tree
Hide file tree
Showing 38 changed files with 839 additions and 337 deletions.
13 changes: 12 additions & 1 deletion .optimus.sample.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,4 +67,15 @@ log:
# name: airflow2
# # skip bootstrap step of scheduler required for proper functioning
# # but can be disabled for local development, default: false
# skip_init: true
# skip_init: true

# application telemetry
#telemetry:
#
# # configure the host:port used for
# # - exposing profiling metrics via pprof over /debug
# # - prometheus stats over /metrics
# profile_addr: ":9110"
#
# # jaeger collector address to send application traces
# jaeger_addr: "http://localhost:14268/api/traces"
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ NAME = "github.com/odpf/optimus"
LAST_COMMIT := $(shell git rev-parse --short HEAD)
LAST_TAG := "$(shell git rev-list --tags --max-count=1)"
OPMS_VERSION := "$(shell git describe --tags ${LAST_TAG})-next"
PROTON_COMMIT := "e3b3bee44b27c5cdfb9276ccda86af4a462614c3"
PROTON_COMMIT := "4c091fe53834323a5f1cef2c96b955c92c522659"

all: build

Expand Down
27 changes: 27 additions & 0 deletions api/handler/v1/runtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import (
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"

"github.com/google/uuid"
"github.com/odpf/optimus/meta"

Expand All @@ -26,6 +29,17 @@ import (
"google.golang.org/protobuf/types/known/timestamppb"
)

var (
runtimeDeployJobSpecificationCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "runtime_deploy_jobspec",
Help: "Number of jobs requested for deployment by runtime",
})
runtimeDeployResourceSpecificationCounter = promauto.NewCounter(prometheus.CounterOpts{
Name: "runtime_deploy_resourcespec",
Help: "Number of resources requested for deployment by runtime",
})
)

type ProjectRepoFactory interface {
New() store.ProjectRepository
}
Expand Down Expand Up @@ -136,6 +150,7 @@ func (sv *RuntimeServiceServer) DeployJobSpecification(req *pb.DeployJobSpecific
return status.Errorf(codes.Internal, "failed to sync jobs: \n%s", err.Error())
}

runtimeDeployJobSpecificationCounter.Add(float64(len(req.Jobs)))
sv.l.Info("finished job deployment", "time", time.Since(startTime))
return nil
}
Expand Down Expand Up @@ -316,6 +331,7 @@ func (sv *RuntimeServiceServer) CreateJobSpecification(ctx context.Context, req
return nil, status.Errorf(codes.Internal, "failed to sync jobs: \n%s", err.Error())
}

runtimeDeployJobSpecificationCounter.Inc()
return &pb.CreateJobSpecificationResponse{
Success: true,
Message: fmt.Sprintf("job %s is created and deployed successfully on project %s", jobSpec.Name, req.GetProjectName()),
Expand Down Expand Up @@ -418,6 +434,13 @@ func (sv *RuntimeServiceServer) ListProjectNamespaces(ctx context.Context, req *
}, nil
}

// RegisterInstance creates a new job run and a running instance in persistent
// store then returns the config/assets attached to the job spec used in running
// the instance.
// Keep in mind, this whole operation should be in a single transaction
// if we expect multiple request coming for the same instance at the
// same time but that should never be the case in our use cases that's why
// for performance reasons we are choosing not to do so.
func (sv *RuntimeServiceServer) RegisterInstance(ctx context.Context, req *pb.RegisterInstanceRequest) (*pb.RegisterInstanceResponse, error) {
projectRepo := sv.projectRepoFactory.New()
projSpec, err := projectRepo.GetByName(ctx, req.GetProjectName())
Expand Down Expand Up @@ -629,6 +652,7 @@ func (sv *RuntimeServiceServer) CreateResource(ctx context.Context, req *pb.Crea
if err := sv.resourceSvc.CreateResource(ctx, namespaceSpec, []models.ResourceSpec{optResource}, sv.progressObserver); err != nil {
return nil, status.Errorf(codes.Internal, "%s: failed to create resource %s", err.Error(), req.Resource.GetName())
}
runtimeDeployResourceSpecificationCounter.Inc()
return &pb.CreateResourceResponse{
Success: true,
}, nil
Expand All @@ -655,6 +679,7 @@ func (sv *RuntimeServiceServer) UpdateResource(ctx context.Context, req *pb.Upda
if err := sv.resourceSvc.UpdateResource(ctx, namespaceSpec, []models.ResourceSpec{optResource}, sv.progressObserver); err != nil {
return nil, status.Errorf(codes.Internal, "%s: failed to create resource %s", err.Error(), req.Resource.GetName())
}
runtimeDeployResourceSpecificationCounter.Inc()
return &pb.UpdateResourceResponse{
Success: true,
}, nil
Expand Down Expand Up @@ -724,6 +749,8 @@ func (sv *RuntimeServiceServer) DeployResourceSpecification(req *pb.DeployResour
if err := sv.resourceSvc.UpdateResource(respStream.Context(), namespaceSpec, resourceSpecs, observers); err != nil {
return status.Errorf(codes.Internal, "failed to update resources: \n%s", err.Error())
}

runtimeDeployResourceSpecificationCounter.Add(float64(len(req.Resources)))
sv.l.Info("finished resource deployment in", "time", time.Since(startTime))
return nil
}
Expand Down
Loading

0 comments on commit d1001c8

Please sign in to comment.