Skip to content

Commit

Permalink
Add observability stack for SOLDR (#40)
Browse files Browse the repository at this point in the history
* Add observability stack for SOLDR

* fix: naming of services, containers, volumes and hostnames

* fix: add mapping device-id from host OS to the vx-containers to pinning generated service ID

* fix: reorder services in the docker compose file

* fix: default option of ES Java opts

* fix: bug with delivery ES metrics from exporter

* feat: add loki service support in compose

* fix: pinning observability components version

* fix: docker compose restart policy to unless-stopped

* feat: add collecting logs and sending it to loki via otel collector

* Update README.md

Co-authored-by: Dmitry Ng <[email protected]>
Co-authored-by: Denis Tarasov <[email protected]>
Co-authored-by: Mikhail Kochegarov <[email protected]>
  • Loading branch information
3 people authored Dec 30, 2022
1 parent 0e04087 commit 0f887a6
Show file tree
Hide file tree
Showing 14 changed files with 702 additions and 65 deletions.
9 changes: 9 additions & 0 deletions .env.template
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,12 @@ MINIO_ENDPOINT=http://127.0.0.1:9000

# internal services
INTERNAL_NET_HOST=127.0.0.1

# observability
OTEL_ADDR=otel.local:8148
GRAFANA_SERVER_HOST=0.0.0.0
GRAFANA_SERVER_PORT=3000
MASTER_PASSWORD=P@ssw0rd
ELK_VERSION=7.14.1
ES_JAVA_OPTS="-Xmx2g -Xms2g"
SPAN_STORAGE_TYPE=elasticsearch
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ docker compose pull
docker compose up -d
```

#### Run observability stack components

```bash
docker compose --profile obs up -d
```

#### Stop services

```bash
Expand Down Expand Up @@ -219,6 +225,23 @@ Launch on of the available debug tasks:
- launch vxagent
- launch web ui

### Observability stack

Observability stack collect metrics, traces, logs from SOLDR components. Stack consist of:
- `Grafana` - querying and visualizing observability data
- `VictoriaMetrics` - datastore for server and SOLDR components metrics
- `node-exporter` and `elasticsearch-exporter` - scraping metrics
- `Jaeger` - storing and querying traces
- `Elasticsearch` - datastore for `Jaeger`
- `OpenTelemetry collector` - single entry point to receive, process and export all observability data

> For more information about collector, visit [`https://opentelemetry.io/docs/collector`](https://opentelemetry.io/docs/collector).
Run observability stack components and then open `Grafana` in a browser [`https://localhost:3000`](https://localhost:3000). Default credentials: `admin/admin`. After default password for `admin` user changed, you can check provisioned SOLDR dashboards by click `Dashboards` icon on menu bar. On this SOLDR dashboards you can view server, agents and modules resource utilization, events statistics etc.
For checking traces, you need to click `Explore` icon on menu bar, then choose `Jaeger` data source from dropdown in the top left. Now you can query and filter traces by `Service Name`, `Operation Name`, `Tags`, `duration` and `Time ranges`.

> Full observability stack requires more resources compare to clean SOLDR.
### Clean up the project

Remove all build files and other security keys:
Expand Down
27 changes: 24 additions & 3 deletions cmd/agent/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,13 @@ func configureLogging(ctx context.Context, c *config.Config) (func(), error) {
}
}
tracerClient := observability.NewHookTracerClient(c.TracerConfigClient)
tracerProvider, err := observability.NewTracerProvider(ctx, tracerClient, serviceName, c.Version, attr)
tracerProvider, err := observability.NewTracerProvider(
ctx,
tracerClient,
serviceName,
c.Version,
attr,
)
if err != nil {
return nil, fmt.Errorf("failed to initialize a tracer provider for logging: %w", err)
}
Expand All @@ -305,12 +311,27 @@ func configureLogging(ctx context.Context, c *config.Config) (func(), error) {
}
}
meterClient := observability.NewHookMeterClient(c.MeterConfigClient)
meterProvider, err := observability.NewMeterProvider(ctx, meterClient, serviceName, c.Version, attr)
meterProvider, err := observability.NewMeterProvider(
ctx,
meterClient,
serviceName,
c.Version,
attr,
)
if err != nil {
return nil, fmt.Errorf("failed to initialized a metrics provider for logging")
}

observability.InitObserver(ctx, tracerProvider, meterProvider, tracerClient, meterClient, serviceName, logLevels)
observability.InitObserver(
ctx,
tracerProvider,
meterProvider,
tracerClient,
meterClient,
serviceName,
c.Version,
logLevels,
)
return func() {
observability.Observer.Close()
}, nil
Expand Down
35 changes: 29 additions & 6 deletions cmd/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ import (
"soldr/internal/version"
)

const serviceName = "vxapi"

type Config struct {
Debug bool `config:"debug"`
Develop bool `config:"is_develop"`
Expand Down Expand Up @@ -191,15 +193,21 @@ func main() {
serviceS3ConnectionStorage := mem.NewServiceS3ConnectionStorage()

tracerClient := observability.NewProxyTracerClient(
observability.NewOtlpTracerClient(cfg.Tracing.Addr),
observability.NewOtlpTracerAndLoggerClient(cfg.Tracing.Addr),
observability.NewHookTracerClient(&observability.HookClientConfig{
ResendTimeout: observability.DefaultResendTimeout,
QueueSizeLimit: observability.DefaultQueueSizeLimit,
PacketSizeLimit: observability.DefaultPacketSizeLimit,
}),
)
attr := attribute.String("api_server_id", system.MakeAgentID())
tracerProvider, err := observability.NewTracerProvider(ctx, tracerClient, "vxapi", version.GetBinaryVersion(), attr)
tracerProvider, err := observability.NewTracerProvider(
ctx,
tracerClient,
serviceName,
version.GetBinaryVersion(),
attr,
)
if err != nil {
logger.WithError(err).Error("could not create tracer provider")
return
Expand All @@ -216,7 +224,13 @@ func main() {
logger.WithError(err).Error("could not create meter client")
return
}
meterProvider, err := observability.NewMeterProvider(ctx, meterClient, "vxapi", version.GetBinaryVersion(), attr)
meterProvider, err := observability.NewMeterProvider(
ctx,
meterClient,
serviceName,
version.GetBinaryVersion(),
attr,
)
if err != nil {
logger.WithError(err).Error("could not create meter provider")
return
Expand All @@ -232,7 +246,16 @@ func main() {
if cfg.Debug {
logLevels = append(logLevels, logrus.DebugLevel)
}
observability.InitObserver(ctx, tracerProvider, meterProvider, tracerClient, meterClient, "vxapi", logLevels)
observability.InitObserver(
ctx,
tracerProvider,
meterProvider,
tracerClient,
meterClient,
serviceName,
version.GetBinaryVersion(),
logLevels,
)

gormMeter := meterProvider.Meter("vxapi-meter")
if err = meter.InitGormMetrics(gormMeter); err != nil {
Expand All @@ -241,8 +264,8 @@ func main() {
}

// initialize system metric collection in current observer instance
observability.Observer.StartProcessMetricCollect("vxapi", version.GetBinaryVersion(), attr)
observability.Observer.StartGoRuntimeMetricCollect("vxapi", version.GetBinaryVersion(), attr)
observability.Observer.StartProcessMetricCollect(serviceName, version.GetBinaryVersion(), attr)
observability.Observer.StartGoRuntimeMetricCollect(serviceName, version.GetBinaryVersion(), attr)
defer observability.Observer.Close()

exchanger := srvevents.NewExchanger()
Expand Down
29 changes: 25 additions & 4 deletions cmd/server/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ func getLogDir(configLogDir string) (string, error) {

func initObserver(server *Server, _ *logrus.Entry) (func(), error) {
server.tracerClient = observability.NewProxyTracerClient(
observability.NewOtlpTracerClient(server.config.OtelAddr),
observability.NewOtlpTracerAndLoggerClient(server.config.OtelAddr),
observability.NewHookTracerClient(&observability.HookClientConfig{
ResendTimeout: 100 * time.Millisecond,
QueueSizeLimit: 100 * 1024 * 1024, // 100 MB
Expand All @@ -502,11 +502,23 @@ func initObserver(server *Server, _ *logrus.Entry) (func(), error) {
)
attr := attribute.String("server_id", system.MakeAgentID())
ctx := context.Background()
tracerProvider, err := observability.NewTracerProvider(ctx, server.tracerClient, serviceName, server.version, attr)
tracerProvider, err := observability.NewTracerProvider(
ctx,
server.tracerClient,
serviceName,
server.version,
attr,
)
if err != nil {
return nil, fmt.Errorf("failed to initialize a tracer provider: %w", err)
}
meterProvider, err := observability.NewMeterProvider(ctx, server.metricsClient, serviceName, server.version, attr)
meterProvider, err := observability.NewMeterProvider(
ctx,
server.metricsClient,
serviceName,
server.version,
attr,
)
if err != nil {
return nil, fmt.Errorf("failed to initialize a metrics provider: %w", err)
}
Expand All @@ -524,7 +536,16 @@ func initObserver(server *Server, _ *logrus.Entry) (func(), error) {
} else {
logrus.SetLevel(logrus.InfoLevel)
}
observability.InitObserver(ctx, tracerProvider, meterProvider, server.tracerClient, server.metricsClient, serviceName, logLevels)
observability.InitObserver(
ctx,
tracerProvider,
meterProvider,
server.tracerClient,
server.metricsClient,
serviceName,
server.version,
logLevels,
)
return func() {
observability.Observer.Close()
}, nil
Expand Down
Loading

0 comments on commit 0f887a6

Please sign in to comment.