From 7e1001a72fedcdb18b75031dd41f49c2cf7a2403 Mon Sep 17 00:00:00 2001 From: Dan Jaglowski Date: Thu, 12 Dec 2024 10:57:51 -0500 Subject: [PATCH] Separate and internalize marshalers, and test exported surface only. --- exporter/chronicleexporter/grpc_exporter.go | 25 +- exporter/chronicleexporter/hostmetrics.go | 3 +- exporter/chronicleexporter/http_exporter.go | 30 +- .../chronicleexporter/internal/ccid/ccid.go | 22 + .../internal/marshal/grpc.go | 243 +++ .../internal/marshal/grpc_test.go | 785 ++++++++ .../internal/marshal/http.go | 240 +++ .../internal/marshal/http_test.go | 769 ++++++++ .../internal/marshal/marshal.go | 205 ++ .../internal/marshal/marshal_test.go | 225 +++ exporter/chronicleexporter/marshal.go | 573 ------ exporter/chronicleexporter/marshal_test.go | 1706 ----------------- 12 files changed, 2526 insertions(+), 2300 deletions(-) create mode 100644 exporter/chronicleexporter/internal/ccid/ccid.go create mode 100644 exporter/chronicleexporter/internal/marshal/grpc.go create mode 100644 exporter/chronicleexporter/internal/marshal/grpc_test.go create mode 100644 exporter/chronicleexporter/internal/marshal/http.go create mode 100644 exporter/chronicleexporter/internal/marshal/http_test.go create mode 100644 exporter/chronicleexporter/internal/marshal/marshal.go create mode 100644 exporter/chronicleexporter/internal/marshal/marshal_test.go delete mode 100644 exporter/chronicleexporter/marshal.go delete mode 100644 exporter/chronicleexporter/marshal_test.go diff --git a/exporter/chronicleexporter/grpc_exporter.go b/exporter/chronicleexporter/grpc_exporter.go index 17c127db0..52e565fa6 100644 --- a/exporter/chronicleexporter/grpc_exporter.go +++ b/exporter/chronicleexporter/grpc_exporter.go @@ -19,7 +19,7 @@ import ( "fmt" "net/http" - "github.com/google/uuid" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/internal/marshal" "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" @@ -41,7 +41,7 @@ type grpcExporter struct { cfg *Config set component.TelemetrySettings id string - marshaler *protoMarshaler + marshaler *marshal.GRPC client api.IngestionServiceV2Client conn *grpc.ClientConn @@ -49,19 +49,24 @@ type grpcExporter struct { } func newGRPCExporter(cfg *Config, params exporter.Settings) (*grpcExporter, error) { - customerID, err := uuid.Parse(cfg.CustomerID) + marshaler, err := marshal.NewGRPC(marshal.Config{ + CustomerID: cfg.CustomerID, + Namespace: cfg.Namespace, + LogType: cfg.LogType, + RawLogField: cfg.RawLogField, + OverrideLogType: cfg.OverrideLogType, + IngestionLabels: cfg.IngestionLabels, + BatchRequestSizeLimit: cfg.BatchRequestSizeLimitGRPC, + BatchLogCountLimit: cfg.BatchLogCountLimitGRPC, + }, params.TelemetrySettings) if err != nil { - return nil, fmt.Errorf("parse customer ID: %w", err) - } - marshaller, err := newProtoMarshaler(*cfg, params.TelemetrySettings, customerID[:]) - if err != nil { - return nil, fmt.Errorf("create proto marshaller: %w", err) + return nil, fmt.Errorf("create proto marshaler: %w", err) } return &grpcExporter{ cfg: cfg, set: params.TelemetrySettings, id: params.ID.String(), - marshaler: marshaller, + marshaler: marshaler, }, nil } @@ -108,7 +113,7 @@ func (exp *grpcExporter) Shutdown(context.Context) error { } func (exp *grpcExporter) ConsumeLogs(ctx context.Context, ld plog.Logs) error { - payloads, err := exp.marshaler.MarshalRawLogs(ctx, ld) + payloads, err := exp.marshaler.MarshalLogs(ctx, ld) if err != nil { return fmt.Errorf("marshal logs: %w", err) } diff --git a/exporter/chronicleexporter/hostmetrics.go b/exporter/chronicleexporter/hostmetrics.go index ed5de40d9..d4f275c68 100644 --- a/exporter/chronicleexporter/hostmetrics.go +++ b/exporter/chronicleexporter/hostmetrics.go @@ -22,6 +22,7 @@ import ( "time" "github.com/google/uuid" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/internal/ccid" "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" "github.com/shirou/gopsutil/v3/process" "go.opentelemetry.io/collector/component" @@ -121,7 +122,7 @@ func (hmr *hostMetricsReporter) getAndReset() *api.BatchCreateEventsRequest { now := timestamppb.Now() batchID := uuid.New() source := &api.EventSource{ - CollectorId: chronicleCollectorID[:], + CollectorId: ccid.ChronicleCollectorID[:], Namespace: hmr.namespace, CustomerId: hmr.customerID, } diff --git a/exporter/chronicleexporter/http_exporter.go b/exporter/chronicleexporter/http_exporter.go index e0d1e62c5..e11be9699 100644 --- a/exporter/chronicleexporter/http_exporter.go +++ b/exporter/chronicleexporter/http_exporter.go @@ -23,7 +23,7 @@ import ( "io" "net/http" - "github.com/google/uuid" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/internal/marshal" "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" "go.opentelemetry.io/collector/component" "go.opentelemetry.io/collector/consumer" @@ -41,23 +41,33 @@ const httpScope = "https://www.googleapis.com/auth/cloud-platform" type httpExporter struct { cfg *Config set component.TelemetrySettings - marshaler *protoMarshaler + marshaler *marshal.HTTP client *http.Client } func newHTTPExporter(cfg *Config, params exporter.Settings) (*httpExporter, error) { - customerID, err := uuid.Parse(cfg.CustomerID) + marshaler, err := marshal.NewHTTP(marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: cfg.CustomerID, + Namespace: cfg.Namespace, + LogType: cfg.LogType, + RawLogField: cfg.RawLogField, + OverrideLogType: cfg.OverrideLogType, + IngestionLabels: cfg.IngestionLabels, + BatchRequestSizeLimit: cfg.BatchRequestSizeLimitHTTP, + BatchLogCountLimit: cfg.BatchLogCountLimitHTTP, + }, + Project: cfg.Project, + Location: cfg.Location, + Forwarder: cfg.Forwarder, + }, params.TelemetrySettings) if err != nil { - return nil, fmt.Errorf("parse customer ID: %w", err) - } - marshaller, err := newProtoMarshaler(*cfg, params.TelemetrySettings, customerID[:]) - if err != nil { - return nil, fmt.Errorf("create proto marshaller: %w", err) + return nil, fmt.Errorf("create proto marshaler: %w", err) } return &httpExporter{ cfg: cfg, set: params.TelemetrySettings, - marshaler: marshaller, + marshaler: marshaler, }, nil } @@ -84,7 +94,7 @@ func (exp *httpExporter) Shutdown(context.Context) error { } func (exp *httpExporter) ConsumeLogs(ctx context.Context, ld plog.Logs) error { - payloads, err := exp.marshaler.MarshalRawLogsForHTTP(ctx, ld) + payloads, err := exp.marshaler.MarshalLogs(ctx, ld) if err != nil { return fmt.Errorf("marshal logs: %w", err) } diff --git a/exporter/chronicleexporter/internal/ccid/ccid.go b/exporter/chronicleexporter/internal/ccid/ccid.go new file mode 100644 index 000000000..38a6b6b95 --- /dev/null +++ b/exporter/chronicleexporter/internal/ccid/ccid.go @@ -0,0 +1,22 @@ +// Copyright observIQ, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package ccid + +import ( + "github.com/google/uuid" +) + +// This is a specific collector ID for Chronicle. It's used to identify bindplane agents in Chronicle. +var ChronicleCollectorID = uuid.MustParse("aaaa1111-aaaa-1111-aaaa-1111aaaa1111") diff --git a/exporter/chronicleexporter/internal/marshal/grpc.go b/exporter/chronicleexporter/internal/marshal/grpc.go new file mode 100644 index 000000000..7ec1ea295 --- /dev/null +++ b/exporter/chronicleexporter/internal/marshal/grpc.go @@ -0,0 +1,243 @@ +// Copyright observIQ, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package marshal + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/plog" + "go.uber.org/zap" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" +) + +type GRPC struct { + protoMarshaler +} + +func NewGRPC(cfg Config, set component.TelemetrySettings) (*GRPC, error) { + m, err := newProtoMarshaler(cfg, set) + if err != nil { + return nil, err + } + return &GRPC{protoMarshaler: *m}, nil +} + +func (m *GRPC) MarshalLogs(ctx context.Context, ld plog.Logs) ([]*api.BatchCreateLogsRequest, error) { + rawLogs, namespace, ingestionLabels, err := m.extractRawLogs(ctx, ld) + if err != nil { + return nil, fmt.Errorf("extract raw logs: %w", err) + } + return m.constructPayloads(rawLogs, namespace, ingestionLabels), nil +} + +func (m *GRPC) extractRawLogs(ctx context.Context, ld plog.Logs) (map[string][]*api.LogEntry, map[string]string, map[string][]*api.Label, error) { + entries := make(map[string][]*api.LogEntry) + namespaceMap := make(map[string]string) + ingestionLabelsMap := make(map[string][]*api.Label) + + for i := 0; i < ld.ResourceLogs().Len(); i++ { + resourceLog := ld.ResourceLogs().At(i) + for j := 0; j < resourceLog.ScopeLogs().Len(); j++ { + scopeLog := resourceLog.ScopeLogs().At(j) + for k := 0; k < scopeLog.LogRecords().Len(); k++ { + logRecord := scopeLog.LogRecords().At(k) + rawLog, logType, namespace, ingestionLabels, err := m.processLogRecord(ctx, logRecord, scopeLog, resourceLog) + + if err != nil { + m.set.Logger.Error("Error processing log record", zap.Error(err)) + continue + } + + if rawLog == "" { + continue + } + + var timestamp time.Time + + if logRecord.Timestamp() != 0 { + timestamp = logRecord.Timestamp().AsTime() + } else { + timestamp = logRecord.ObservedTimestamp().AsTime() + } + + entry := &api.LogEntry{ + Timestamp: timestamppb.New(timestamp), + CollectionTime: timestamppb.New(logRecord.ObservedTimestamp().AsTime()), + Data: []byte(rawLog), + } + entries[logType] = append(entries[logType], entry) + // each logType maps to exactly 1 namespace value + if namespace != "" { + if _, ok := namespaceMap[logType]; !ok { + namespaceMap[logType] = namespace + } + } + if len(ingestionLabels) > 0 { + // each logType maps to a list of ingestion labels + if _, exists := ingestionLabelsMap[logType]; !exists { + ingestionLabelsMap[logType] = make([]*api.Label, 0) + } + existingLabels := make(map[string]struct{}) + for _, label := range ingestionLabelsMap[logType] { + existingLabels[label.Key] = struct{}{} + } + for _, label := range ingestionLabels { + // only add to ingestionLabelsMap if the label is unique + if _, ok := existingLabels[label.Key]; !ok { + ingestionLabelsMap[logType] = append(ingestionLabelsMap[logType], label) + existingLabels[label.Key] = struct{}{} + } + } + } + } + } + } + return entries, namespaceMap, ingestionLabelsMap, nil +} + +func (m *GRPC) processLogRecord(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, string, string, []*api.Label, error) { + rawLog, err := m.getRawLog(ctx, logRecord, scope, resource) + if err != nil { + return "", "", "", nil, err + } + logType, err := m.getLogType(ctx, logRecord, scope, resource) + if err != nil { + return "", "", "", nil, err + } + namespace, err := m.getNamespace(ctx, logRecord, scope, resource) + if err != nil { + return "", "", "", nil, err + } + ingestionLabels, err := m.getIngestionLabels(logRecord) + if err != nil { + return "", "", "", nil, err + } + return rawLog, logType, namespace, ingestionLabels, nil +} + +func (m *GRPC) getIngestionLabels(logRecord plog.LogRecord) ([]*api.Label, error) { + // check for labels in attributes["chronicle_ingestion_labels"] + ingestionLabels, err := m.getRawNestedFields(chronicleIngestionLabelsPrefix, logRecord) + if err != nil { + return []*api.Label{}, fmt.Errorf("get chronicle ingestion labels: %w", err) + } + + if len(ingestionLabels) != 0 { + return ingestionLabels, nil + } + // use labels defined in config if needed + configLabels := make([]*api.Label, 0) + for key, value := range m.cfg.IngestionLabels { + configLabels = append(configLabels, &api.Label{ + Key: key, + Value: value, + }) + } + return configLabels, nil +} + +func (m *GRPC) getRawNestedFields(field string, logRecord plog.LogRecord) ([]*api.Label, error) { + var nestedFields []*api.Label + logRecord.Attributes().Range(func(key string, value pcommon.Value) bool { + if !strings.HasPrefix(key, field) { + return true + } + // Extract the key name from the nested field + cleanKey := strings.Trim(key[len(field):], `[]"`) + var jsonMap map[string]string + + // If needs to be parsed as JSON + if err := json.Unmarshal([]byte(value.AsString()), &jsonMap); err == nil { + for k, v := range jsonMap { + nestedFields = append(nestedFields, &api.Label{Key: k, Value: v}) + } + } else { + nestedFields = append(nestedFields, &api.Label{Key: cleanKey, Value: value.AsString()}) + } + return true + }) + return nestedFields, nil +} + +func (m *GRPC) constructPayloads(rawLogs map[string][]*api.LogEntry, namespaceMap map[string]string, ingestionLabelsMap map[string][]*api.Label) []*api.BatchCreateLogsRequest { + payloads := make([]*api.BatchCreateLogsRequest, 0, len(rawLogs)) + for logType, entries := range rawLogs { + if len(entries) > 0 { + namespace, ok := namespaceMap[logType] + if !ok { + namespace = m.cfg.Namespace + } + ingestionLabels := ingestionLabelsMap[logType] + + request := m.buildGRPCRequest(entries, logType, namespace, ingestionLabels) + + payloads = append(payloads, m.enforceMaximumsGRPCRequest(request)...) + } + } + return payloads +} + +func (m *GRPC) enforceMaximumsGRPCRequest(request *api.BatchCreateLogsRequest) []*api.BatchCreateLogsRequest { + size := proto.Size(request) + entries := request.Batch.Entries + if size <= m.cfg.BatchRequestSizeLimit && len(entries) <= m.cfg.BatchLogCountLimit { + return []*api.BatchCreateLogsRequest{ + request, + } + } + + if len(entries) < 2 { + m.set.Logger.Error("Single entry exceeds max request size. Dropping entry", zap.Int("size", size)) + return []*api.BatchCreateLogsRequest{} + } + + // split request into two + mid := len(entries) / 2 + leftHalf := entries[:mid] + rightHalf := entries[mid:] + + request.Batch.Entries = leftHalf + otherHalfRequest := m.buildGRPCRequest(rightHalf, request.Batch.LogType, request.Batch.Source.Namespace, request.Batch.Source.Labels) + + // re-enforce max size restriction on each half + enforcedRequest := m.enforceMaximumsGRPCRequest(request) + enforcedOtherHalfRequest := m.enforceMaximumsGRPCRequest(otherHalfRequest) + + return append(enforcedRequest, enforcedOtherHalfRequest...) +} + +func (m *GRPC) buildGRPCRequest(entries []*api.LogEntry, logType, namespace string, ingestionLabels []*api.Label) *api.BatchCreateLogsRequest { + return &api.BatchCreateLogsRequest{ + Batch: &api.LogEntryBatch{ + StartTime: timestamppb.New(m.startTime), + Entries: entries, + LogType: logType, + Source: &api.EventSource{ + CollectorId: m.collectorID, + CustomerId: m.customerID, + Labels: ingestionLabels, + Namespace: namespace, + }, + }, + } +} diff --git a/exporter/chronicleexporter/internal/marshal/grpc_test.go b/exporter/chronicleexporter/internal/marshal/grpc_test.go new file mode 100644 index 000000000..9b5d6779b --- /dev/null +++ b/exporter/chronicleexporter/internal/marshal/grpc_test.go @@ -0,0 +1,785 @@ +// Copyright observIQ, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package marshal_test + +import ( + "context" + "testing" + "time" + + "github.com/google/uuid" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/internal/marshal" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/pdata/plog" + "go.uber.org/zap" + "google.golang.org/protobuf/types/known/timestamppb" +) + +func TestGRPC(t *testing.T) { + logger := zap.NewNop() + + tests := []struct { + name string + cfg marshal.Config + logRecords func() plog.Logs + expectations func(t *testing.T, requests []*api.BatchCreateLogsRequest, startTime time.Time) + }{ + { + name: "Single log record with expected data", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Test log message", map[string]any{"log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["env"]`: "prod"})) + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, startTime time.Time) { + require.Len(t, requests, 1) + batch := requests[0].Batch + require.Equal(t, "WINEVTLOG", batch.LogType) + require.Len(t, batch.Entries, 1) + + // Convert Data (byte slice) to string for comparison + logDataAsString := string(batch.Entries[0].Data) + expectedLogData := `Test log message` + require.Equal(t, expectedLogData, logDataAsString) + + require.NotNil(t, batch.StartTime) + require.True(t, timestamppb.New(startTime).AsTime().Equal(batch.StartTime.AsTime()), "Start time should be set correctly") + }, + }, + { + name: "Single log record with expected data, no log_type, namespace, or ingestion labels", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: true, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Test log message", nil)) + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, startTime time.Time) { + require.Len(t, requests, 1) + batch := requests[0].Batch + require.Equal(t, "WINEVTLOG", batch.LogType) + require.Equal(t, "", batch.Source.Namespace) + require.Equal(t, 0, len(batch.Source.Labels)) + require.Len(t, batch.Entries, 1) + + // Convert Data (byte slice) to string for comparison + logDataAsString := string(batch.Entries[0].Data) + expectedLogData := `Test log message` + require.Equal(t, expectedLogData, logDataAsString) + + require.NotNil(t, batch.StartTime) + require.True(t, timestamppb.New(startTime).AsTime().Equal(batch.StartTime.AsTime()), "Start time should be set correctly") + }, + }, + { + name: "Multiple log records", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + return logs + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + require.Len(t, requests, 1, "Expected a single batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") + // Verifying the first log entry data + require.Equal(t, "First log message", string(batch.Entries[0].Data)) + // Verifying the second log entry data + require.Equal(t, "Second log message", string(batch.Entries[1].Data)) + }, + }, + { + name: "Log record with attributes", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "attributes", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("", map[string]any{"key1": "value1", "log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"})) + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + require.Len(t, requests, 1) + batch := requests[0].Batch + require.Len(t, batch.Entries, 1) + + // Assuming the attributes are marshaled into the Data field as a JSON string + expectedData := `{"key1":"value1", "log_type":"WINEVTLOG", "namespace":"test", "chronicle_ingestion_label[\"key1\"]": "value1", "chronicle_ingestion_label[\"key2\"]": "value2"}` + actualData := string(batch.Entries[0].Data) + require.JSONEq(t, expectedData, actualData, "Log attributes should match expected") + }, + }, + { + name: "No log records", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "DEFAULT", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + return plog.NewLogs() // No log records added + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + require.Len(t, requests, 0, "Expected no requests due to no log records") + }, + }, + { + name: "No log type set in config or attributes", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + RawLogField: "body", + OverrideLogType: true, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Log without logType", map[string]any{"namespace": "test", `ingestion_label["realkey1"]`: "realvalue1", `ingestion_label["realkey2"]`: "realvalue2"})) + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + require.Len(t, requests, 1) + batch := requests[0].Batch + require.Equal(t, "", batch.LogType, "Expected log type to be empty") + }, + }, + { + name: "Multiple log records with duplicate data, no log type in attributes", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + return logs + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify one request for log type in config + require.Len(t, requests, 1, "Expected a single batch request") + batch := requests[0].Batch + // verify batch source labels + require.Len(t, batch.Source.Labels, 2) + require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") + // Verifying the first log entry data + require.Equal(t, "First log message", string(batch.Entries[0].Data)) + // Verifying the second log entry data + require.Equal(t, "Second log message", string(batch.Entries[1].Data)) + }, + }, + { + name: "Multiple log records with different data, no log type in attributes", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) + return logs + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify one request for one log type + require.Len(t, requests, 1, "Expected a single batch request") + batch := requests[0].Batch + require.Equal(t, "WINEVTLOG", batch.LogType) + require.Equal(t, "", batch.Source.Namespace) + // verify batch source labels + require.Len(t, batch.Source.Labels, 4) + require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") + // Verifying the first log entry data + require.Equal(t, "First log message", string(batch.Entries[0].Data)) + // Verifying the second log entry data + require.Equal(t, "Second log message", string(batch.Entries[1].Data)) + }, + }, + { + name: "Override log type with attribute", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "DEFAULT", // This should be overridden by the log_type attribute + RawLogField: "body", + OverrideLogType: true, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"log_type": "windows_event.application", "namespace": "test", `ingestion_label["realkey1"]`: "realvalue1", `ingestion_label["realkey2"]`: "realvalue2"})) + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + require.Len(t, requests, 1) + batch := requests[0].Batch + require.Equal(t, "WINEVTLOG", batch.LogType, "Expected log type to be overridden by attribute") + }, + }, + { + name: "Override log type with chronicle attribute", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "DEFAULT", // This should be overridden by the chronicle_log_type attribute + RawLogField: "body", + OverrideLogType: true, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"chronicle_log_type": "ASOC_ALERT", "chronicle_namespace": "test", `chronicle_ingestion_label["realkey1"]`: "realvalue1", `chronicle_ingestion_label["realkey2"]`: "realvalue2"})) + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + require.Len(t, requests, 1) + batch := requests[0].Batch + require.Equal(t, "ASOC_ALERT", batch.LogType, "Expected log type to be overridden by attribute") + require.Equal(t, "test", batch.Source.Namespace, "Expected namespace to be overridden by attribute") + expectedLabels := map[string]string{ + "realkey1": "realvalue1", + "realkey2": "realvalue2", + } + for _, label := range batch.Source.Labels { + require.Equal(t, expectedLabels[label.Key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Multiple log records with duplicate data, log type in attributes", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + return logs + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify 1 request, 2 batches for same log type + require.Len(t, requests, 1, "Expected a single batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") + // verify batch for first log + require.Equal(t, "WINEVTLOGS", batch.LogType) + require.Equal(t, "test1", batch.Source.Namespace) + require.Len(t, batch.Source.Labels, 2) + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + } + for _, label := range batch.Source.Labels { + require.Equal(t, expectedLabels[label.Key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Multiple log records with different data, log type in attributes", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS2", "chronicle_namespace": "test2", `chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) + return logs + }, + + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify 2 requests, with 1 batch for different log types + require.Len(t, requests, 2, "Expected a two batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 1, "Expected one log entries in the batch") + // verify batch for first log + require.Contains(t, batch.LogType, "WINEVTLOGS") + require.Contains(t, batch.Source.Namespace, "test") + require.Len(t, batch.Source.Labels, 2) + + batch2 := requests[1].Batch + require.Len(t, batch2.Entries, 1, "Expected one log entries in the batch") + // verify batch for second log + require.Contains(t, batch2.LogType, "WINEVTLOGS") + require.Contains(t, batch2.Source.Namespace, "test") + require.Len(t, batch2.Source.Labels, 2) + // verify ingestion labels + for _, req := range requests { + for _, label := range req.Batch.Source.Labels { + require.Contains(t, []string{ + "key1", + "key2", + "key3", + "key4", + }, label.Key) + require.Contains(t, []string{ + "value1", + "value2", + "value3", + "value4", + }, label.Value) + } + } + }, + }, + { + name: "Many logs, all one batch", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + for i := 0; i < 1000; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr("Log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + return logs + }, + + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify 1 request, with 1 batch + require.Len(t, requests, 1, "Expected a one-batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 1000, "Expected 1000 log entries in the batch") + // verify batch for first log + require.Contains(t, batch.LogType, "WINEVTLOGS") + require.Contains(t, batch.Source.Namespace, "test") + require.Len(t, batch.Source.Labels, 2) + + // verify ingestion labels + for _, req := range requests { + for _, label := range req.Batch.Source.Labels { + require.Contains(t, []string{ + "key1", + "key2", + "key3", + "key4", + }, label.Key) + require.Contains(t, []string{ + "value1", + "value2", + "value3", + "value4", + }, label.Value) + } + } + }, + }, + { + name: "Single batch split into multiple because more than 1000 logs", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + for i := 0; i < 1001; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr("Log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + return logs + }, + + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify 1 request, with 1 batch + require.Len(t, requests, 2, "Expected a two-batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 500, "Expected 500 log entries in the first batch") + // verify batch for first log + require.Contains(t, batch.LogType, "WINEVTLOGS") + require.Contains(t, batch.Source.Namespace, "test") + require.Len(t, batch.Source.Labels, 2) + + batch2 := requests[1].Batch + require.Len(t, batch2.Entries, 501, "Expected 501 log entries in the second batch") + // verify batch for first log + require.Contains(t, batch2.LogType, "WINEVTLOGS") + require.Contains(t, batch2.Source.Namespace, "test") + require.Len(t, batch2.Source.Labels, 2) + + // verify ingestion labels + for _, req := range requests { + for _, label := range req.Batch.Source.Labels { + require.Contains(t, []string{ + "key1", + "key2", + "key3", + "key4", + }, label.Key) + require.Contains(t, []string{ + "value1", + "value2", + "value3", + "value4", + }, label.Value) + } + } + }, + }, + { + name: "Recursively split batch, exceeds 1000 entries multiple times", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + for i := 0; i < 2002; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr("Log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + return logs + }, + + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify 1 request, with 1 batch + require.Len(t, requests, 4, "Expected a four-batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 500, "Expected 500 log entries in the first batch") + // verify batch for first log + require.Contains(t, batch.LogType, "WINEVTLOGS") + require.Contains(t, batch.Source.Namespace, "test") + require.Len(t, batch.Source.Labels, 2) + + batch2 := requests[1].Batch + require.Len(t, batch2.Entries, 501, "Expected 501 log entries in the second batch") + // verify batch for first log + require.Contains(t, batch2.LogType, "WINEVTLOGS") + require.Contains(t, batch2.Source.Namespace, "test") + require.Len(t, batch2.Source.Labels, 2) + + batch3 := requests[2].Batch + require.Len(t, batch3.Entries, 500, "Expected 500 log entries in the third batch") + // verify batch for first log + require.Contains(t, batch3.LogType, "WINEVTLOGS") + require.Contains(t, batch3.Source.Namespace, "test") + require.Len(t, batch3.Source.Labels, 2) + + batch4 := requests[3].Batch + require.Len(t, batch4.Entries, 501, "Expected 501 log entries in the fourth batch") + // verify batch for first log + require.Contains(t, batch4.LogType, "WINEVTLOGS") + require.Contains(t, batch4.Source.Namespace, "test") + require.Len(t, batch4.Source.Labels, 2) + + // verify ingestion labels + for _, req := range requests { + for _, label := range req.Batch.Source.Labels { + require.Contains(t, []string{ + "key1", + "key2", + "key3", + "key4", + }, label.Key) + require.Contains(t, []string{ + "value1", + "value2", + "value3", + "value4", + }, label.Value) + } + } + }, + }, + { + name: "Single batch split into multiple because request size too large", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + // create 640 logs with size 8192 bytes each - totalling 5242880 bytes. non-body fields put us over limit + for i := 0; i < 640; i++ { + record1 := logRecords.AppendEmpty() + body := tokenWithLength(8192) + record1.Body().SetStr(string(body)) + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + return logs + }, + + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify request, with 1 batch + require.Len(t, requests, 2, "Expected a two-batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 320, "Expected 320 log entries in the first batch") + // verify batch for first log + require.Contains(t, batch.LogType, "WINEVTLOGS") + require.Contains(t, batch.Source.Namespace, "test") + require.Len(t, batch.Source.Labels, 2) + + batch2 := requests[1].Batch + require.Len(t, batch2.Entries, 320, "Expected 320 log entries in the second batch") + // verify batch for first log + require.Contains(t, batch2.LogType, "WINEVTLOGS") + require.Contains(t, batch2.Source.Namespace, "test") + require.Len(t, batch2.Source.Labels, 2) + + // verify ingestion labels + for _, req := range requests { + for _, label := range req.Batch.Source.Labels { + require.Contains(t, []string{ + "key1", + "key2", + "key3", + "key4", + }, label.Key) + require.Contains(t, []string{ + "value1", + "value2", + "value3", + "value4", + }, label.Value) + } + } + }, + }, + { + name: "Recursively split batch into multiple because request size too large", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + // create 1280 logs with size 8192 bytes each - totalling 5242880 * 2 bytes. non-body fields put us over twice the limit + for i := 0; i < 1280; i++ { + record1 := logRecords.AppendEmpty() + body := tokenWithLength(8192) + record1.Body().SetStr(string(body)) + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + return logs + }, + + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify 1 request, with 1 batch + require.Len(t, requests, 4, "Expected a four-batch request") + batch := requests[0].Batch + require.Len(t, batch.Entries, 320, "Expected 320 log entries in the first batch") + // verify batch for first log + require.Contains(t, batch.LogType, "WINEVTLOGS") + require.Contains(t, batch.Source.Namespace, "test") + require.Len(t, batch.Source.Labels, 2) + + batch2 := requests[1].Batch + require.Len(t, batch2.Entries, 320, "Expected 320 log entries in the second batch") + // verify batch for first log + require.Contains(t, batch2.LogType, "WINEVTLOGS") + require.Contains(t, batch2.Source.Namespace, "test") + require.Len(t, batch2.Source.Labels, 2) + + batch3 := requests[2].Batch + require.Len(t, batch3.Entries, 320, "Expected 320 log entries in the third batch") + // verify batch for first log + require.Contains(t, batch3.LogType, "WINEVTLOGS") + require.Contains(t, batch3.Source.Namespace, "test") + require.Len(t, batch3.Source.Labels, 2) + + batch4 := requests[3].Batch + require.Len(t, batch4.Entries, 320, "Expected 320 log entries in the fourth batch") + // verify batch for first log + require.Contains(t, batch4.LogType, "WINEVTLOGS") + require.Contains(t, batch4.Source.Namespace, "test") + require.Len(t, batch4.Source.Labels, 2) + + // verify ingestion labels + for _, req := range requests { + for _, label := range req.Batch.Source.Labels { + require.Contains(t, []string{ + "key1", + "key2", + "key3", + "key4", + }, label.Key) + require.Contains(t, []string{ + "value1", + "value2", + "value3", + "value4", + }, label.Value) + } + } + }, + }, + { + name: "Unsplittable batch, single log exceeds max request size", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + body := tokenWithLength(5242881) + record1.Body().SetStr(string(body)) + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + return logs + }, + + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // verify 1 request, with 1 batch + require.Len(t, requests, 0, "Expected a zero requests") + }, + }, + { + name: "Multiple valid log records + unsplittable log entries", + cfg: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + tooLargeBody := string(tokenWithLength(5242881)) + // first normal log, then impossible to split log + logRecords1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + record1 := logRecords1.AppendEmpty() + record1.Body().SetStr("First log message") + tooLargeRecord1 := logRecords1.AppendEmpty() + tooLargeRecord1.Body().SetStr(tooLargeBody) + // first impossible to split log, then normal log + logRecords2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + tooLargeRecord2 := logRecords2.AppendEmpty() + tooLargeRecord2.Body().SetStr(tooLargeBody) + record2 := logRecords2.AppendEmpty() + record2.Body().SetStr("Second log message") + return logs + }, + expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest, _ time.Time) { + // this is a kind of weird edge case, the overly large logs makes the final requests quite inefficient, but it's going to be so rare that the inefficiency isn't a real concern + require.Len(t, requests, 2, "Expected two batch requests") + batch1 := requests[0].Batch + require.Len(t, batch1.Entries, 1, "Expected one log entry in the first batch") + // Verifying the first log entry data + require.Equal(t, "First log message", string(batch1.Entries[0].Data)) + + batch2 := requests[1].Batch + require.Len(t, batch2.Entries, 1, "Expected one log entry in the second batch") + // Verifying the second log entry data + require.Equal(t, "Second log message", string(batch2.Entries[0].Data)) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + marshaler, err := marshal.NewGRPC(tt.cfg, component.TelemetrySettings{Logger: logger}) + require.NoError(t, err) + + logs := tt.logRecords() + requests, err := marshaler.MarshalLogs(context.Background(), logs) + require.NoError(t, err) + + tt.expectations(t, requests, marshaler.StartTime()) + }) + } +} diff --git a/exporter/chronicleexporter/internal/marshal/http.go b/exporter/chronicleexporter/internal/marshal/http.go new file mode 100644 index 000000000..210c22d35 --- /dev/null +++ b/exporter/chronicleexporter/internal/marshal/http.go @@ -0,0 +1,240 @@ +// Copyright observIQ, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package marshal + +import ( + "context" + "encoding/json" + "fmt" + "strings" + "time" + + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/plog" + "go.uber.org/zap" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" +) + +type HTTPConfig struct { + Config + Project string + Location string + Forwarder string +} + +type HTTP struct { + protoMarshaler + project string + location string + forwarder string +} + +func NewHTTP(cfg HTTPConfig, set component.TelemetrySettings) (*HTTP, error) { + m, err := newProtoMarshaler(cfg.Config, set) + if err != nil { + return nil, err + } + return &HTTP{ + protoMarshaler: *m, + project: cfg.Project, + location: cfg.Location, + forwarder: cfg.Forwarder, + }, nil +} + +func (m *HTTP) MarshalLogs(ctx context.Context, ld plog.Logs) (map[string][]*api.ImportLogsRequest, error) { + rawLogs, err := m.extractRawHTTPLogs(ctx, ld) + if err != nil { + return nil, fmt.Errorf("extract raw logs: %w", err) + } + return m.constructHTTPPayloads(rawLogs), nil +} + +func (m *HTTP) extractRawHTTPLogs(ctx context.Context, ld plog.Logs) (map[string][]*api.Log, error) { + entries := make(map[string][]*api.Log) + for i := 0; i < ld.ResourceLogs().Len(); i++ { + resourceLog := ld.ResourceLogs().At(i) + for j := 0; j < resourceLog.ScopeLogs().Len(); j++ { + scopeLog := resourceLog.ScopeLogs().At(j) + for k := 0; k < scopeLog.LogRecords().Len(); k++ { + logRecord := scopeLog.LogRecords().At(k) + rawLog, logType, namespace, ingestionLabels, err := m.processHTTPLogRecord(ctx, logRecord, scopeLog, resourceLog) + if err != nil { + m.set.Logger.Error("Error processing log record", zap.Error(err)) + continue + } + + if rawLog == "" { + continue + } + + var timestamp time.Time + if logRecord.Timestamp() != 0 { + timestamp = logRecord.Timestamp().AsTime() + } else { + timestamp = logRecord.ObservedTimestamp().AsTime() + } + + entry := &api.Log{ + LogEntryTime: timestamppb.New(timestamp), + CollectionTime: timestamppb.New(logRecord.ObservedTimestamp().AsTime()), + Data: []byte(rawLog), + EnvironmentNamespace: namespace, + Labels: ingestionLabels, + } + entries[logType] = append(entries[logType], entry) + } + } + } + + return entries, nil +} + +func (m *HTTP) processHTTPLogRecord(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, string, string, map[string]*api.Log_LogLabel, error) { + rawLog, err := m.getRawLog(ctx, logRecord, scope, resource) + if err != nil { + return "", "", "", nil, err + } + + logType, err := m.getLogType(ctx, logRecord, scope, resource) + if err != nil { + return "", "", "", nil, err + } + namespace, err := m.getNamespace(ctx, logRecord, scope, resource) + if err != nil { + return "", "", "", nil, err + } + ingestionLabels, err := m.getHTTPIngestionLabels(logRecord) + if err != nil { + return "", "", "", nil, err + } + + return rawLog, logType, namespace, ingestionLabels, nil +} + +func (m *HTTP) getHTTPIngestionLabels(logRecord plog.LogRecord) (map[string]*api.Log_LogLabel, error) { + // Check for labels in attributes["chronicle_ingestion_labels"] + ingestionLabels, err := m.getHTTPRawNestedFields(chronicleIngestionLabelsPrefix, logRecord) + if err != nil { + return nil, fmt.Errorf("get chronicle ingestion labels: %w", err) + } + + if len(ingestionLabels) != 0 { + return ingestionLabels, nil + } + + // use labels defined in the config if needed + configLabels := make(map[string]*api.Log_LogLabel) + for key, value := range m.cfg.IngestionLabels { + configLabels[key] = &api.Log_LogLabel{ + Value: value, + } + } + return configLabels, nil +} + +func (m *HTTP) getHTTPRawNestedFields(field string, logRecord plog.LogRecord) (map[string]*api.Log_LogLabel, error) { + nestedFields := make(map[string]*api.Log_LogLabel) // Map with key as string and value as Log_LogLabel + logRecord.Attributes().Range(func(key string, value pcommon.Value) bool { + if !strings.HasPrefix(key, field) { + return true + } + // Extract the key name from the nested field + cleanKey := strings.Trim(key[len(field):], `[]"`) + var jsonMap map[string]string + + // If needs to be parsed as JSON + if err := json.Unmarshal([]byte(value.AsString()), &jsonMap); err == nil { + for k, v := range jsonMap { + nestedFields[k] = &api.Log_LogLabel{ + Value: v, + } + } + } else { + nestedFields[cleanKey] = &api.Log_LogLabel{ + Value: value.AsString(), + } + } + return true + }) + + return nestedFields, nil +} + +func (m *HTTP) buildForwarderString() string { + format := "projects/%s/locations/%s/instances/%s/forwarders/%s" + return fmt.Sprintf(format, m.project, m.location, m.customerID, m.forwarder) +} + +func (m *HTTP) constructHTTPPayloads(rawLogs map[string][]*api.Log) map[string][]*api.ImportLogsRequest { + payloads := make(map[string][]*api.ImportLogsRequest, len(rawLogs)) + + for logType, entries := range rawLogs { + if len(entries) > 0 { + request := m.buildHTTPRequest(entries) + + payloads[logType] = m.enforceMaximumsHTTPRequest(request) + } + } + return payloads +} + +func (m *HTTP) enforceMaximumsHTTPRequest(request *api.ImportLogsRequest) []*api.ImportLogsRequest { + size := proto.Size(request) + logs := request.GetInlineSource().Logs + if size <= m.cfg.BatchRequestSizeLimit && len(logs) <= m.cfg.BatchLogCountLimit { + return []*api.ImportLogsRequest{ + request, + } + } + + if len(logs) < 2 { + m.set.Logger.Error("Single entry exceeds max request size. Dropping entry", zap.Int("size", size)) + return []*api.ImportLogsRequest{} + } + + // split request into two + mid := len(logs) / 2 + leftHalf := logs[:mid] + rightHalf := logs[mid:] + + request.GetInlineSource().Logs = leftHalf + otherHalfRequest := m.buildHTTPRequest(rightHalf) + + // re-enforce max size restriction on each half + enforcedRequest := m.enforceMaximumsHTTPRequest(request) + enforcedOtherHalfRequest := m.enforceMaximumsHTTPRequest(otherHalfRequest) + + return append(enforcedRequest, enforcedOtherHalfRequest...) +} + +func (m *HTTP) buildHTTPRequest(entries []*api.Log) *api.ImportLogsRequest { + return &api.ImportLogsRequest{ + // TODO: Add parent and hint + // We don't yet have solid guidance on what these should be + Parent: "", + Hint: "", + + Source: &api.ImportLogsRequest_InlineSource{ + InlineSource: &api.ImportLogsRequest_LogsInlineSource{ + Forwarder: m.buildForwarderString(), + Logs: entries, + }, + }, + } +} diff --git a/exporter/chronicleexporter/internal/marshal/http_test.go b/exporter/chronicleexporter/internal/marshal/http_test.go new file mode 100644 index 000000000..ed1072957 --- /dev/null +++ b/exporter/chronicleexporter/internal/marshal/http_test.go @@ -0,0 +1,769 @@ +// Copyright observIQ, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package marshal_test + +import ( + "context" + "testing" + "time" + + "github.com/google/uuid" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/internal/marshal" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/pdata/plog" + "go.uber.org/zap" +) + +func TestHTTP(t *testing.T) { + logger := zap.NewNop() + + tests := []struct { + name string + cfg marshal.HTTPConfig + labels []*api.Label + logRecords func() plog.Logs + expectations func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) + }{ + { + name: "Single log record with expected data", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + Project: "test-project", + Location: "us", + Forwarder: uuid.New().String(), + }, + labels: []*api.Label{ + {Key: "env", Value: "prod"}, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Test log message", map[string]any{"log_type": "WINEVTLOG", "namespace": "test"})) + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, _ time.Time) { + require.Len(t, requests, 1) + logs := requests["WINEVTLOG"][0].GetInlineSource().Logs + require.Len(t, logs, 1) + // Convert Data (byte slice) to string for comparison + logDataAsString := string(logs[0].Data) + expectedLogData := `Test log message` + require.Equal(t, expectedLogData, logDataAsString) + }, + }, + { + name: "Multiple log records", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + labels: []*api.Label{ + {Key: "env", Value: "staging"}, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + return logs + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 1, "Expected a single batch request") + logs := requests["WINEVTLOG"][0].GetInlineSource().Logs + require.Len(t, logs, 2, "Expected two log entries in the batch") + // Verifying the first log entry data + require.Equal(t, "First log message", string(logs[0].Data)) + // Verifying the second log entry data + require.Equal(t, "Second log message", string(logs[1].Data)) + }, + }, + { + name: "Log record with attributes", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "attributes", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + labels: []*api.Label{}, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("", map[string]any{"key1": "value1", "log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"})) + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 1) + logs := requests["WINEVTLOG"][0].GetInlineSource().Logs + // Assuming the attributes are marshaled into the Data field as a JSON string + expectedData := `{"key1":"value1", "log_type":"WINEVTLOG", "namespace":"test", "chronicle_ingestion_label[\"key1\"]": "value1", "chronicle_ingestion_label[\"key2\"]": "value2"}` + actualData := string(logs[0].Data) + require.JSONEq(t, expectedData, actualData, "Log attributes should match expected") + }, + }, + { + name: "No log records", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "DEFAULT", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + labels: []*api.Label{}, + logRecords: func() plog.Logs { + return plog.NewLogs() // No log records added + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 0, "Expected no requests due to no log records") + }, + }, + { + name: "No log type set in config or attributes", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "attributes", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + labels: []*api.Label{}, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("", map[string]any{"key1": "value1", "log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"})) + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 1) + logs := requests["WINEVTLOG"][0].GetInlineSource().Logs + // Assuming the attributes are marshaled into the Data field as a JSON string + expectedData := `{"key1":"value1", "log_type":"WINEVTLOG", "namespace":"test", "chronicle_ingestion_label[\"key1\"]": "value1", "chronicle_ingestion_label[\"key2\"]": "value2"}` + actualData := string(logs[0].Data) + require.JSONEq(t, expectedData, actualData, "Log attributes should match expected") + }, + }, + { + name: "Multiple log records with duplicate data, no log type in attributes", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + return logs + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + // verify one request for log type in config + require.Len(t, requests, 1, "Expected a single batch request") + logs := requests["WINEVTLOG"][0].GetInlineSource().Logs + // verify batch source labels + require.Len(t, logs[0].Labels, 2) + require.Len(t, logs, 2, "Expected two log entries in the batch") + // Verifying the first log entry data + require.Equal(t, "First log message", string(logs[0].Data)) + // Verifying the second log entry data + require.Equal(t, "Second log message", string(logs[1].Data)) + }, + }, + { + name: "Multiple log records with different data, no log type in attributes", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) + return logs + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + // verify one request for one log type + require.Len(t, requests, 1, "Expected a single batch request") + logs := requests["WINEVTLOG"][0].GetInlineSource().Logs + require.Len(t, logs, 2, "Expected two log entries in the batch") + require.Equal(t, "", logs[0].EnvironmentNamespace) + // verify batch source labels + require.Len(t, logs[0].Labels, 2) + require.Len(t, logs[1].Labels, 2) + // Verifying the first log entry data + require.Equal(t, "First log message", string(logs[0].Data)) + // Verifying the second log entry data + require.Equal(t, "Second log message", string(logs[1].Data)) + }, + }, + { + name: "Override log type with attribute", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "DEFAULT", // This should be overridden by the log_type attribute + RawLogField: "body", + OverrideLogType: true, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"log_type": "windows_event.application", "namespace": "test", `ingestion_label["realkey1"]`: "realvalue1", `ingestion_label["realkey2"]`: "realvalue2"})) + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 1) + logs := requests["WINEVTLOG"][0].GetInlineSource().Logs + require.NotEqual(t, len(logs), 0) + }, + }, + { + name: "Override log type with chronicle attribute", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "DEFAULT", // This should be overridden by the chronicle_log_type attribute + RawLogField: "body", + OverrideLogType: true, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"chronicle_log_type": "ASOC_ALERT", "chronicle_namespace": "test", `chronicle_ingestion_label["realkey1"]`: "realvalue1", `chronicle_ingestion_label["realkey2"]`: "realvalue2"})) + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 1) + logs := requests["ASOC_ALERT"][0].GetInlineSource().Logs + require.Equal(t, "test", logs[0].EnvironmentNamespace, "Expected namespace to be overridden by attribute") + expectedLabels := map[string]string{ + "realkey1": "realvalue1", + "realkey2": "realvalue2", + } + for key, label := range logs[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Multiple log records with duplicate data, log type in attributes", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + return logs + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + // verify 1 request, 2 batches for same log type + require.Len(t, requests, 1, "Expected a single batch request") + logs := requests["WINEVTLOGS"][0].GetInlineSource().Logs + require.Len(t, logs, 2, "Expected two log entries in the batch") + // verify variables + require.Equal(t, "test1", logs[0].EnvironmentNamespace) + require.Len(t, logs[0].Labels, 2) + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + } + for key, label := range logs[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Multiple log records with different data, log type in attributes", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + + record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record2.Body().SetStr("Second log message") + record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS2", "chronicle_namespace": "test2", `chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) + return logs + }, + + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + "key3": "value3", + "key4": "value4", + } + // verify 2 requests, with 1 batch for different log types + require.Len(t, requests, 2, "Expected a two batch request") + + logs1 := requests["WINEVTLOGS1"][0].GetInlineSource().Logs + require.Len(t, logs1, 1, "Expected one log entries in the batch") + // verify variables for first log + require.Equal(t, logs1[0].EnvironmentNamespace, "test1") + require.Len(t, logs1[0].Labels, 2) + for key, label := range logs1[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs2 := requests["WINEVTLOGS2"][0].GetInlineSource().Logs + require.Len(t, logs2, 1, "Expected one log entries in the batch") + // verify variables for second log + require.Equal(t, logs2[0].EnvironmentNamespace, "test2") + require.Len(t, logs2[0].Labels, 2) + for key, label := range logs2[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Many log records all one batch", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + for i := 0; i < 1000; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + + return logs + }, + + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + } + // verify 1 requests + require.Len(t, requests, 1, "Expected a one batch request") + + logs1 := requests["WINEVTLOGS1"][0].GetInlineSource().Logs + require.Len(t, logs1, 1000, "Expected one thousand log entries in the batch") + // verify variables for first log + require.Equal(t, logs1[0].EnvironmentNamespace, "test1") + require.Len(t, logs1[0].Labels, 2) + for key, label := range logs1[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Many log records split into two batches", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + for i := 0; i < 1001; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + + return logs + }, + + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + } + // verify 1 request log type + require.Len(t, requests, 1, "Expected one log type for the requests") + winEvtLogRequests := requests["WINEVTLOGS1"] + require.Len(t, winEvtLogRequests, 2, "Expected two batches") + + logs1 := winEvtLogRequests[0].GetInlineSource().Logs + require.Len(t, logs1, 500, "Expected 500 log entries in the first batch") + // verify variables for first log + require.Equal(t, logs1[0].EnvironmentNamespace, "test1") + require.Len(t, logs1[0].Labels, 2) + for key, label := range logs1[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs2 := winEvtLogRequests[1].GetInlineSource().Logs + require.Len(t, logs2, 501, "Expected 501 log entries in the second batch") + // verify variables for first log + require.Equal(t, logs2[0].EnvironmentNamespace, "test1") + require.Len(t, logs2[0].Labels, 2) + for key, label := range logs2[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Recursively split batch multiple times because too many logs", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + for i := 0; i < 2002; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr("First log message") + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + + return logs + }, + + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + } + // verify 1 request log type + require.Len(t, requests, 1, "Expected one log type for the requests") + winEvtLogRequests := requests["WINEVTLOGS1"] + require.Len(t, winEvtLogRequests, 4, "Expected four batches") + + logs1 := winEvtLogRequests[0].GetInlineSource().Logs + require.Len(t, logs1, 500, "Expected 500 log entries in the first batch") + // verify variables for first log + require.Equal(t, logs1[0].EnvironmentNamespace, "test1") + require.Len(t, logs1[0].Labels, 2) + for key, label := range logs1[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs2 := winEvtLogRequests[1].GetInlineSource().Logs + require.Len(t, logs2, 501, "Expected 501 log entries in the second batch") + // verify variables for first log + require.Equal(t, logs2[0].EnvironmentNamespace, "test1") + require.Len(t, logs2[0].Labels, 2) + for key, label := range logs2[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs3 := winEvtLogRequests[2].GetInlineSource().Logs + require.Len(t, logs3, 500, "Expected 500 log entries in the third batch") + // verify variables for first log + require.Equal(t, logs3[0].EnvironmentNamespace, "test1") + require.Len(t, logs3[0].Labels, 2) + for key, label := range logs3[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs4 := winEvtLogRequests[3].GetInlineSource().Logs + require.Len(t, logs4, 501, "Expected 501 log entries in the fourth batch") + // verify variables for first log + require.Equal(t, logs4[0].EnvironmentNamespace, "test1") + require.Len(t, logs4[0].Labels, 2) + for key, label := range logs4[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Many log records split into two batches because request size too large", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + // 8192 * 640 = 5242880 + body := tokenWithLength(8192) + for i := 0; i < 640; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr(string(body)) + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + + return logs + }, + + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + } + // verify 1 request log type + require.Len(t, requests, 1, "Expected one log type for the requests") + winEvtLogRequests := requests["WINEVTLOGS1"] + require.Len(t, winEvtLogRequests, 2, "Expected two batches") + + logs1 := winEvtLogRequests[0].GetInlineSource().Logs + require.Len(t, logs1, 320, "Expected 320 log entries in the first batch") + // verify variables for first log + require.Equal(t, logs1[0].EnvironmentNamespace, "test1") + require.Len(t, logs1[0].Labels, 2) + for key, label := range logs1[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs2 := winEvtLogRequests[1].GetInlineSource().Logs + require.Len(t, logs2, 320, "Expected 320 log entries in the second batch") + // verify variables for first log + require.Equal(t, logs2[0].EnvironmentNamespace, "test1") + require.Len(t, logs2[0].Labels, 2) + for key, label := range logs2[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Recursively split into batches because request size too large", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 2000, + BatchRequestSizeLimit: 5242880, + }, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + // 8192 * 1280 = 5242880 * 2 + body := tokenWithLength(8192) + for i := 0; i < 1280; i++ { + record1 := logRecords.AppendEmpty() + record1.Body().SetStr(string(body)) + record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) + } + + return logs + }, + + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + expectedLabels := map[string]string{ + "key1": "value1", + "key2": "value2", + } + // verify 1 request log type + require.Len(t, requests, 1, "Expected one log type for the requests") + winEvtLogRequests := requests["WINEVTLOGS1"] + require.Len(t, winEvtLogRequests, 4, "Expected four batches") + + logs1 := winEvtLogRequests[0].GetInlineSource().Logs + require.Len(t, logs1, 320, "Expected 320 log entries in the first batch") + // verify variables for first log + require.Equal(t, logs1[0].EnvironmentNamespace, "test1") + require.Len(t, logs1[0].Labels, 2) + for key, label := range logs1[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs2 := winEvtLogRequests[1].GetInlineSource().Logs + require.Len(t, logs2, 320, "Expected 320 log entries in the second batch") + // verify variables for first log + require.Equal(t, logs2[0].EnvironmentNamespace, "test1") + require.Len(t, logs2[0].Labels, 2) + for key, label := range logs2[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs3 := winEvtLogRequests[2].GetInlineSource().Logs + require.Len(t, logs3, 320, "Expected 320 log entries in the third batch") + // verify variables for first log + require.Equal(t, logs3[0].EnvironmentNamespace, "test1") + require.Len(t, logs3[0].Labels, 2) + for key, label := range logs3[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + + logs4 := winEvtLogRequests[3].GetInlineSource().Logs + require.Len(t, logs4, 320, "Expected 320 log entries in the fourth batch") + // verify variables for first log + require.Equal(t, logs4[0].EnvironmentNamespace, "test1") + require.Len(t, logs4[0].Labels, 2) + for key, label := range logs4[0].Labels { + require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") + } + }, + }, + { + name: "Unsplittable log record, single log exceeds request size limit", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 100000, + }, + }, + labels: []*api.Label{ + {Key: "env", Value: "staging"}, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() + record1.Body().SetStr(string(tokenWithLength(100000))) + return logs + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 1, "Expected one log type") + require.Len(t, requests["WINEVTLOG"], 0, "Expected WINEVTLOG log type to have zero requests") + }, + }, + { + name: "Unsplittable log record, single log exceeds request size limit, mixed with okay logs", + cfg: marshal.HTTPConfig{ + Config: marshal.Config{ + CustomerID: uuid.New().String(), + LogType: "WINEVTLOG", + RawLogField: "body", + OverrideLogType: false, + BatchLogCountLimit: 1000, + BatchRequestSizeLimit: 100000, + }, + }, + labels: []*api.Label{ + {Key: "env", Value: "staging"}, + }, + logRecords: func() plog.Logs { + logs := plog.NewLogs() + tooLargeBody := string(tokenWithLength(100001)) + // first normal log, then impossible to split log + logRecords1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + record1 := logRecords1.AppendEmpty() + record1.Body().SetStr("First log message") + tooLargeRecord1 := logRecords1.AppendEmpty() + tooLargeRecord1.Body().SetStr(tooLargeBody) + // first impossible to split log, then normal log + logRecords2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() + tooLargeRecord2 := logRecords2.AppendEmpty() + tooLargeRecord2.Body().SetStr(tooLargeBody) + record2 := logRecords2.AppendEmpty() + record2.Body().SetStr("Second log message") + return logs + }, + expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest, startTime time.Time) { + require.Len(t, requests, 1, "Expected one log type") + winEvtLogRequests := requests["WINEVTLOG"] + require.Len(t, winEvtLogRequests, 2, "Expected WINEVTLOG log type to have zero requests") + + logs1 := winEvtLogRequests[0].GetInlineSource().Logs + require.Len(t, logs1, 1, "Expected 1 log entry in the first batch") + require.Equal(t, string(logs1[0].Data), "First log message") + + logs2 := winEvtLogRequests[1].GetInlineSource().Logs + require.Len(t, logs2, 1, "Expected 1 log entry in the second batch") + require.Equal(t, string(logs2[0].Data), "Second log message") + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + marshaler, err := marshal.NewHTTP(tt.cfg, component.TelemetrySettings{Logger: logger}) + require.NoError(t, err) + + logs := tt.logRecords() + requests, err := marshaler.MarshalLogs(context.Background(), logs) + require.NoError(t, err) + + tt.expectations(t, requests, marshaler.StartTime()) + }) + } +} diff --git a/exporter/chronicleexporter/internal/marshal/marshal.go b/exporter/chronicleexporter/internal/marshal/marshal.go new file mode 100644 index 000000000..a68336268 --- /dev/null +++ b/exporter/chronicleexporter/internal/marshal/marshal.go @@ -0,0 +1,205 @@ +// Copyright observIQ, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package marshal + +import ( + "context" + "encoding/json" + "fmt" + "time" + + "github.com/google/uuid" + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/internal/ccid" + "github.com/observiq/bindplane-agent/expr" + "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" + "go.opentelemetry.io/collector/component" + "go.opentelemetry.io/collector/pdata/pcommon" + "go.opentelemetry.io/collector/pdata/plog" +) + +const logTypeField = `attributes["log_type"]` +const chronicleLogTypeField = `attributes["chronicle_log_type"]` +const chronicleNamespaceField = `attributes["chronicle_namespace"]` +const chronicleIngestionLabelsPrefix = `chronicle_ingestion_label` + +var supportedLogTypes = map[string]string{ + "windows_event.security": "WINEVTLOG", + "windows_event.application": "WINEVTLOG", + "windows_event.system": "WINEVTLOG", + "sql_server": "MICROSOFT_SQL", +} + +// This is a subset of the HTTPConfig but if we ever identify a need for GRPC-specific config fields, +// then we should make it a shared unexported struct and embed it in both HTTPConfig and Config. +type Config struct { + CustomerID string + Namespace string + LogType string + RawLogField string + OverrideLogType bool + IngestionLabels map[string]string + BatchRequestSizeLimit int + BatchLogCountLimit int +} + +type protoMarshaler struct { + cfg Config + set component.TelemetrySettings + startTime time.Time + customerID []byte + collectorID []byte +} + +func newProtoMarshaler(cfg Config, set component.TelemetrySettings) (*protoMarshaler, error) { + customerID, err := uuid.Parse(cfg.CustomerID) + if err != nil { + return nil, fmt.Errorf("parse customer ID: %w", err) + } + return &protoMarshaler{ + startTime: time.Now(), + cfg: cfg, + set: set, + customerID: customerID[:], + collectorID: ccid.ChronicleCollectorID[:], + }, nil +} + +func (m *protoMarshaler) StartTime() time.Time { + return m.startTime +} + +func (m *protoMarshaler) getRawLog(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { + if m.cfg.RawLogField == "" { + entireLogRecord := map[string]any{ + "body": logRecord.Body().Str(), + "attributes": logRecord.Attributes().AsRaw(), + "resource_attributes": resource.Resource().Attributes().AsRaw(), + } + + bytesLogRecord, err := json.Marshal(entireLogRecord) + if err != nil { + return "", fmt.Errorf("marshal log record: %w", err) + } + + return string(bytesLogRecord), nil + } + return GetRawField(ctx, m.set, m.cfg.RawLogField, logRecord, scope, resource) +} + +func (m *protoMarshaler) getLogType(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { + // check for attributes in attributes["chronicle_log_type"] + logType, err := GetRawField(ctx, m.set, chronicleLogTypeField, logRecord, scope, resource) + if err != nil { + return "", fmt.Errorf("get chronicle log type: %w", err) + } + if logType != "" { + return logType, nil + } + + if m.cfg.OverrideLogType { + logType, err := GetRawField(ctx, m.set, logTypeField, logRecord, scope, resource) + + if err != nil { + return "", fmt.Errorf("get log type: %w", err) + } + if logType != "" { + if chronicleLogType, ok := supportedLogTypes[logType]; ok { + return chronicleLogType, nil + } + } + } + + return m.cfg.LogType, nil +} + +func (m *protoMarshaler) getNamespace(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { + // check for attributes in attributes["chronicle_namespace"] + namespace, err := GetRawField(ctx, m.set, chronicleNamespaceField, logRecord, scope, resource) + if err != nil { + return "", fmt.Errorf("get chronicle log type: %w", err) + } + if namespace != "" { + return namespace, nil + } + return m.cfg.Namespace, nil +} + +func GetRawField(ctx context.Context, set component.TelemetrySettings, field string, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { + switch field { + case "body": + switch logRecord.Body().Type() { + case pcommon.ValueTypeStr: + return logRecord.Body().Str(), nil + case pcommon.ValueTypeMap: + bytes, err := json.Marshal(logRecord.Body().AsRaw()) + if err != nil { + return "", fmt.Errorf("marshal log body: %w", err) + } + return string(bytes), nil + } + case logTypeField: + attributes := logRecord.Attributes().AsRaw() + if logType, ok := attributes["log_type"]; ok { + if v, ok := logType.(string); ok { + return v, nil + } + } + return "", nil + case chronicleLogTypeField: + attributes := logRecord.Attributes().AsRaw() + if logType, ok := attributes["chronicle_log_type"]; ok { + if v, ok := logType.(string); ok { + return v, nil + } + } + return "", nil + case chronicleNamespaceField: + attributes := logRecord.Attributes().AsRaw() + if namespace, ok := attributes["chronicle_namespace"]; ok { + if v, ok := namespace.(string); ok { + return v, nil + } + } + return "", nil + } + + lrExpr, err := expr.NewOTTLLogRecordExpression(field, set) + if err != nil { + return "", fmt.Errorf("raw_log_field is invalid: %s", err) + } + tCtx := ottllog.NewTransformContext(logRecord, scope.Scope(), resource.Resource(), scope, resource) + + lrExprResult, err := lrExpr.Execute(ctx, tCtx) + if err != nil { + return "", fmt.Errorf("execute log record expression: %w", err) + } + + if lrExprResult == nil { + return "", nil + } + + switch result := lrExprResult.(type) { + case string: + return result, nil + case pcommon.Map: + bytes, err := json.Marshal(result.AsRaw()) + if err != nil { + return "", fmt.Errorf("marshal log record expression result: %w", err) + } + return string(bytes), nil + default: + return "", fmt.Errorf("unsupported log record expression result type: %T", lrExprResult) + } +} diff --git a/exporter/chronicleexporter/internal/marshal/marshal_test.go b/exporter/chronicleexporter/internal/marshal/marshal_test.go new file mode 100644 index 000000000..d78586fbb --- /dev/null +++ b/exporter/chronicleexporter/internal/marshal/marshal_test.go @@ -0,0 +1,225 @@ +// Copyright observIQ, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package marshal_test + +import ( + "context" + "testing" + + "github.com/observiq/bindplane-agent/exporter/chronicleexporter/internal/marshal" + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/component/componenttest" + "go.opentelemetry.io/collector/pdata/plog" + "golang.org/x/exp/rand" +) + +func Test_GetRawField(t *testing.T) { + for _, tc := range getRawFieldCases { + t.Run(tc.name, func(t *testing.T) { + ctx := context.Background() + set := componenttest.NewNopTelemetrySettings() + rawField, err := marshal.GetRawField(ctx, set, tc.field, tc.logRecord, tc.scope, tc.resource) + if tc.expectErrStr != "" { + require.Contains(t, err.Error(), tc.expectErrStr) + return + } + require.NoError(t, err) + require.Equal(t, tc.expect, rawField) + }) + } +} + +func Benchmark_GetRawField(b *testing.B) { + ctx := context.Background() + set := componenttest.NewNopTelemetrySettings() + for _, tc := range getRawFieldCases { + b.ResetTimer() + b.Run(tc.name, func(b *testing.B) { + for i := 0; i < b.N; i++ { + _, _ = marshal.GetRawField(ctx, set, tc.field, tc.logRecord, tc.scope, tc.resource) + } + }) + } +} + +func tokenWithLength(length int) []byte { + charset := "abcdefghijklmnopqrstuvwxyz" + b := make([]byte, length) + for i := range b { + b[i] = charset[rand.Intn(len(charset))] + } + return b +} + +func mockLogRecord(body string, attributes map[string]any) plog.LogRecord { + lr := plog.NewLogRecord() + lr.Body().SetStr(body) + for k, v := range attributes { + switch val := v.(type) { + case string: + lr.Attributes().PutStr(k, val) + default: + } + } + return lr +} + +func mockLogs(record plog.LogRecord) plog.Logs { + logs := plog.NewLogs() + rl := logs.ResourceLogs().AppendEmpty() + sl := rl.ScopeLogs().AppendEmpty() + record.CopyTo(sl.LogRecords().AppendEmpty()) + return logs +} + +type getRawFieldCase struct { + name string + field string + logRecord plog.LogRecord + scope plog.ScopeLogs + resource plog.ResourceLogs + expect string + expectErrStr string +} + +// Used by tests and benchmarks +var getRawFieldCases = []getRawFieldCase{ + { + name: "String body", + field: "body", + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Body().SetStr("703604000x80800000000000003562SystemWIN-L6PC55MPB98Print Spoolerstopped530070006F006F006C00650072002F0031000000") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "703604000x80800000000000003562SystemWIN-L6PC55MPB98Print Spoolerstopped530070006F006F006C00650072002F0031000000", + }, + { + name: "Empty body", + field: "body", + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Body().SetStr("") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "", + }, + { + name: "Map body", + field: "body", + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Body().SetEmptyMap() + lr.Body().Map().PutStr("param1", "Print Spooler") + lr.Body().Map().PutStr("param2", "stopped") + lr.Body().Map().PutStr("binary", "530070006F006F006C00650072002F0031000000") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: `{"binary":"530070006F006F006C00650072002F0031000000","param1":"Print Spooler","param2":"stopped"}`, + }, + { + name: "Map body field", + field: "body[\"param1\"]", + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Body().SetEmptyMap() + lr.Body().Map().PutStr("param1", "Print Spooler") + lr.Body().Map().PutStr("param2", "stopped") + lr.Body().Map().PutStr("binary", "530070006F006F006C00650072002F0031000000") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "Print Spooler", + }, + { + name: "Map body field missing", + field: "body[\"missing\"]", + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Body().SetEmptyMap() + lr.Body().Map().PutStr("param1", "Print Spooler") + lr.Body().Map().PutStr("param2", "stopped") + lr.Body().Map().PutStr("binary", "530070006F006F006C00650072002F0031000000") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "", + }, + { + name: "Attribute log_type", + field: `attributes["log_type"]`, + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Attributes().PutStr("status", "200") + lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") + lr.Attributes().PutStr("log_type", "WINEVTLOG") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "WINEVTLOG", + }, + { + name: "Attribute log_type missing", + field: `attributes["log_type"]`, + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Attributes().PutStr("status", "200") + lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "", + }, + { + name: "Attribute chronicle_log_type", + field: `attributes["chronicle_log_type"]`, + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Attributes().PutStr("status", "200") + lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") + lr.Attributes().PutStr("chronicle_log_type", "MICROSOFT_SQL") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "MICROSOFT_SQL", + }, + { + name: "Attribute chronicle_namespace", + field: `attributes["chronicle_namespace"]`, + logRecord: func() plog.LogRecord { + lr := plog.NewLogRecord() + lr.Attributes().PutStr("status", "200") + lr.Attributes().PutStr("log_type", "k8s-container") + lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") + lr.Attributes().PutStr("chronicle_log_type", "MICROSOFT_SQL") + lr.Attributes().PutStr("chronicle_namespace", "test") + return lr + }(), + scope: plog.NewScopeLogs(), + resource: plog.NewResourceLogs(), + expect: "test", + }, +} diff --git a/exporter/chronicleexporter/marshal.go b/exporter/chronicleexporter/marshal.go deleted file mode 100644 index f40a871ec..000000000 --- a/exporter/chronicleexporter/marshal.go +++ /dev/null @@ -1,573 +0,0 @@ -// Copyright observIQ, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package chronicleexporter - -import ( - "context" - "encoding/json" - "fmt" - "strings" - "time" - - "github.com/google/uuid" - "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" - "github.com/observiq/bindplane-agent/expr" - "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl/contexts/ottllog" - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/pdata/pcommon" - "go.opentelemetry.io/collector/pdata/plog" - "go.uber.org/zap" - "google.golang.org/protobuf/proto" - "google.golang.org/protobuf/types/known/timestamppb" -) - -const logTypeField = `attributes["log_type"]` -const chronicleLogTypeField = `attributes["chronicle_log_type"]` -const chronicleNamespaceField = `attributes["chronicle_namespace"]` -const chronicleIngestionLabelsPrefix = `chronicle_ingestion_label` - -// This is a specific collector ID for Chronicle. It's used to identify bindplane agents in Chronicle. -var chronicleCollectorID = uuid.MustParse("aaaa1111-aaaa-1111-aaaa-1111aaaa1111") - -var supportedLogTypes = map[string]string{ - "windows_event.security": "WINEVTLOG", - "windows_event.application": "WINEVTLOG", - "windows_event.system": "WINEVTLOG", - "sql_server": "MICROSOFT_SQL", -} - -type protoMarshaler struct { - cfg Config - set component.TelemetrySettings - startTime time.Time - customerID []byte - collectorID []byte -} - -func newProtoMarshaler(cfg Config, set component.TelemetrySettings, customerID []byte) (*protoMarshaler, error) { - return &protoMarshaler{ - startTime: time.Now(), - cfg: cfg, - set: set, - customerID: customerID[:], - collectorID: chronicleCollectorID[:], - }, nil -} - -func (m *protoMarshaler) MarshalRawLogs(ctx context.Context, ld plog.Logs) ([]*api.BatchCreateLogsRequest, error) { - rawLogs, namespace, ingestionLabels, err := m.extractRawLogs(ctx, ld) - if err != nil { - return nil, fmt.Errorf("extract raw logs: %w", err) - } - return m.constructPayloads(rawLogs, namespace, ingestionLabels), nil -} - -func (m *protoMarshaler) extractRawLogs(ctx context.Context, ld plog.Logs) (map[string][]*api.LogEntry, map[string]string, map[string][]*api.Label, error) { - entries := make(map[string][]*api.LogEntry) - namespaceMap := make(map[string]string) - ingestionLabelsMap := make(map[string][]*api.Label) - - for i := 0; i < ld.ResourceLogs().Len(); i++ { - resourceLog := ld.ResourceLogs().At(i) - for j := 0; j < resourceLog.ScopeLogs().Len(); j++ { - scopeLog := resourceLog.ScopeLogs().At(j) - for k := 0; k < scopeLog.LogRecords().Len(); k++ { - logRecord := scopeLog.LogRecords().At(k) - rawLog, logType, namespace, ingestionLabels, err := m.processLogRecord(ctx, logRecord, scopeLog, resourceLog) - - if err != nil { - m.set.Logger.Error("Error processing log record", zap.Error(err)) - continue - } - - if rawLog == "" { - continue - } - - var timestamp time.Time - - if logRecord.Timestamp() != 0 { - timestamp = logRecord.Timestamp().AsTime() - } else { - timestamp = logRecord.ObservedTimestamp().AsTime() - } - - entry := &api.LogEntry{ - Timestamp: timestamppb.New(timestamp), - CollectionTime: timestamppb.New(logRecord.ObservedTimestamp().AsTime()), - Data: []byte(rawLog), - } - entries[logType] = append(entries[logType], entry) - // each logType maps to exactly 1 namespace value - if namespace != "" { - if _, ok := namespaceMap[logType]; !ok { - namespaceMap[logType] = namespace - } - } - if len(ingestionLabels) > 0 { - // each logType maps to a list of ingestion labels - if _, exists := ingestionLabelsMap[logType]; !exists { - ingestionLabelsMap[logType] = make([]*api.Label, 0) - } - existingLabels := make(map[string]struct{}) - for _, label := range ingestionLabelsMap[logType] { - existingLabels[label.Key] = struct{}{} - } - for _, label := range ingestionLabels { - // only add to ingestionLabelsMap if the label is unique - if _, ok := existingLabels[label.Key]; !ok { - ingestionLabelsMap[logType] = append(ingestionLabelsMap[logType], label) - existingLabels[label.Key] = struct{}{} - } - } - } - } - } - } - return entries, namespaceMap, ingestionLabelsMap, nil -} - -func (m *protoMarshaler) processLogRecord(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, string, string, []*api.Label, error) { - rawLog, err := m.getRawLog(ctx, logRecord, scope, resource) - if err != nil { - return "", "", "", nil, err - } - logType, err := m.getLogType(ctx, logRecord, scope, resource) - if err != nil { - return "", "", "", nil, err - } - namespace, err := m.getNamespace(ctx, logRecord, scope, resource) - if err != nil { - return "", "", "", nil, err - } - ingestionLabels, err := m.getIngestionLabels(logRecord) - if err != nil { - return "", "", "", nil, err - } - return rawLog, logType, namespace, ingestionLabels, nil -} - -func (m *protoMarshaler) processHTTPLogRecord(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, string, string, map[string]*api.Log_LogLabel, error) { - rawLog, err := m.getRawLog(ctx, logRecord, scope, resource) - if err != nil { - return "", "", "", nil, err - } - - logType, err := m.getLogType(ctx, logRecord, scope, resource) - if err != nil { - return "", "", "", nil, err - } - namespace, err := m.getNamespace(ctx, logRecord, scope, resource) - if err != nil { - return "", "", "", nil, err - } - ingestionLabels, err := m.getHTTPIngestionLabels(logRecord) - if err != nil { - return "", "", "", nil, err - } - - return rawLog, logType, namespace, ingestionLabels, nil -} - -func (m *protoMarshaler) getRawLog(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { - if m.cfg.RawLogField == "" { - entireLogRecord := map[string]any{ - "body": logRecord.Body().Str(), - "attributes": logRecord.Attributes().AsRaw(), - "resource_attributes": resource.Resource().Attributes().AsRaw(), - } - - bytesLogRecord, err := json.Marshal(entireLogRecord) - if err != nil { - return "", fmt.Errorf("marshal log record: %w", err) - } - - return string(bytesLogRecord), nil - } - return m.getRawField(ctx, m.cfg.RawLogField, logRecord, scope, resource) -} - -func (m *protoMarshaler) getLogType(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { - // check for attributes in attributes["chronicle_log_type"] - logType, err := m.getRawField(ctx, chronicleLogTypeField, logRecord, scope, resource) - if err != nil { - return "", fmt.Errorf("get chronicle log type: %w", err) - } - if logType != "" { - return logType, nil - } - - if m.cfg.OverrideLogType { - logType, err := m.getRawField(ctx, logTypeField, logRecord, scope, resource) - - if err != nil { - return "", fmt.Errorf("get log type: %w", err) - } - if logType != "" { - if chronicleLogType, ok := supportedLogTypes[logType]; ok { - return chronicleLogType, nil - } - } - } - - return m.cfg.LogType, nil -} - -func (m *protoMarshaler) getNamespace(ctx context.Context, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { - // check for attributes in attributes["chronicle_namespace"] - namespace, err := m.getRawField(ctx, chronicleNamespaceField, logRecord, scope, resource) - if err != nil { - return "", fmt.Errorf("get chronicle log type: %w", err) - } - if namespace != "" { - return namespace, nil - } - return m.cfg.Namespace, nil -} - -func (m *protoMarshaler) getIngestionLabels(logRecord plog.LogRecord) ([]*api.Label, error) { - // check for labels in attributes["chronicle_ingestion_labels"] - ingestionLabels, err := m.getRawNestedFields(chronicleIngestionLabelsPrefix, logRecord) - if err != nil { - return []*api.Label{}, fmt.Errorf("get chronicle ingestion labels: %w", err) - } - - if len(ingestionLabels) != 0 { - return ingestionLabels, nil - } - // use labels defined in config if needed - configLabels := make([]*api.Label, 0) - for key, value := range m.cfg.IngestionLabels { - configLabels = append(configLabels, &api.Label{ - Key: key, - Value: value, - }) - } - return configLabels, nil -} - -func (m *protoMarshaler) getHTTPIngestionLabels(logRecord plog.LogRecord) (map[string]*api.Log_LogLabel, error) { - // Check for labels in attributes["chronicle_ingestion_labels"] - ingestionLabels, err := m.getHTTPRawNestedFields(chronicleIngestionLabelsPrefix, logRecord) - if err != nil { - return nil, fmt.Errorf("get chronicle ingestion labels: %w", err) - } - - if len(ingestionLabels) != 0 { - return ingestionLabels, nil - } - - // use labels defined in the config if needed - configLabels := make(map[string]*api.Log_LogLabel) - for key, value := range m.cfg.IngestionLabels { - configLabels[key] = &api.Log_LogLabel{ - Value: value, - } - } - return configLabels, nil -} - -func (m *protoMarshaler) getRawField(ctx context.Context, field string, logRecord plog.LogRecord, scope plog.ScopeLogs, resource plog.ResourceLogs) (string, error) { - switch field { - case "body": - switch logRecord.Body().Type() { - case pcommon.ValueTypeStr: - return logRecord.Body().Str(), nil - case pcommon.ValueTypeMap: - bytes, err := json.Marshal(logRecord.Body().AsRaw()) - if err != nil { - return "", fmt.Errorf("marshal log body: %w", err) - } - return string(bytes), nil - } - case logTypeField: - attributes := logRecord.Attributes().AsRaw() - if logType, ok := attributes["log_type"]; ok { - if v, ok := logType.(string); ok { - return v, nil - } - } - return "", nil - case chronicleLogTypeField: - attributes := logRecord.Attributes().AsRaw() - if logType, ok := attributes["chronicle_log_type"]; ok { - if v, ok := logType.(string); ok { - return v, nil - } - } - return "", nil - case chronicleNamespaceField: - attributes := logRecord.Attributes().AsRaw() - if namespace, ok := attributes["chronicle_namespace"]; ok { - if v, ok := namespace.(string); ok { - return v, nil - } - } - return "", nil - } - - lrExpr, err := expr.NewOTTLLogRecordExpression(field, m.set) - if err != nil { - return "", fmt.Errorf("raw_log_field is invalid: %s", err) - } - tCtx := ottllog.NewTransformContext(logRecord, scope.Scope(), resource.Resource(), scope, resource) - - lrExprResult, err := lrExpr.Execute(ctx, tCtx) - if err != nil { - return "", fmt.Errorf("execute log record expression: %w", err) - } - - if lrExprResult == nil { - return "", nil - } - - switch result := lrExprResult.(type) { - case string: - return result, nil - case pcommon.Map: - bytes, err := json.Marshal(result.AsRaw()) - if err != nil { - return "", fmt.Errorf("marshal log record expression result: %w", err) - } - return string(bytes), nil - default: - return "", fmt.Errorf("unsupported log record expression result type: %T", lrExprResult) - } -} - -func (m *protoMarshaler) getRawNestedFields(field string, logRecord plog.LogRecord) ([]*api.Label, error) { - var nestedFields []*api.Label - logRecord.Attributes().Range(func(key string, value pcommon.Value) bool { - if !strings.HasPrefix(key, field) { - return true - } - // Extract the key name from the nested field - cleanKey := strings.Trim(key[len(field):], `[]"`) - var jsonMap map[string]string - - // If needs to be parsed as JSON - if err := json.Unmarshal([]byte(value.AsString()), &jsonMap); err == nil { - for k, v := range jsonMap { - nestedFields = append(nestedFields, &api.Label{Key: k, Value: v}) - } - } else { - nestedFields = append(nestedFields, &api.Label{Key: cleanKey, Value: value.AsString()}) - } - return true - }) - return nestedFields, nil -} - -func (m *protoMarshaler) getHTTPRawNestedFields(field string, logRecord plog.LogRecord) (map[string]*api.Log_LogLabel, error) { - nestedFields := make(map[string]*api.Log_LogLabel) // Map with key as string and value as Log_LogLabel - logRecord.Attributes().Range(func(key string, value pcommon.Value) bool { - if !strings.HasPrefix(key, field) { - return true - } - // Extract the key name from the nested field - cleanKey := strings.Trim(key[len(field):], `[]"`) - var jsonMap map[string]string - - // If needs to be parsed as JSON - if err := json.Unmarshal([]byte(value.AsString()), &jsonMap); err == nil { - for k, v := range jsonMap { - nestedFields[k] = &api.Log_LogLabel{ - Value: v, - } - } - } else { - nestedFields[cleanKey] = &api.Log_LogLabel{ - Value: value.AsString(), - } - } - return true - }) - - return nestedFields, nil -} - -func (m *protoMarshaler) constructPayloads(rawLogs map[string][]*api.LogEntry, namespaceMap map[string]string, ingestionLabelsMap map[string][]*api.Label) []*api.BatchCreateLogsRequest { - payloads := make([]*api.BatchCreateLogsRequest, 0, len(rawLogs)) - for logType, entries := range rawLogs { - if len(entries) > 0 { - namespace, ok := namespaceMap[logType] - if !ok { - namespace = m.cfg.Namespace - } - ingestionLabels := ingestionLabelsMap[logType] - - request := m.buildGRPCRequest(entries, logType, namespace, ingestionLabels) - - payloads = append(payloads, m.enforceMaximumsGRPCRequest(request)...) - } - } - return payloads -} - -func (m *protoMarshaler) enforceMaximumsGRPCRequest(request *api.BatchCreateLogsRequest) []*api.BatchCreateLogsRequest { - size := proto.Size(request) - entries := request.Batch.Entries - if size <= m.cfg.BatchRequestSizeLimitGRPC && len(entries) <= m.cfg.BatchLogCountLimitGRPC { - return []*api.BatchCreateLogsRequest{ - request, - } - } - - if len(entries) < 2 { - m.set.Logger.Error("Single entry exceeds max request size. Dropping entry", zap.Int("size", size)) - return []*api.BatchCreateLogsRequest{} - } - - // split request into two - mid := len(entries) / 2 - leftHalf := entries[:mid] - rightHalf := entries[mid:] - - request.Batch.Entries = leftHalf - otherHalfRequest := m.buildGRPCRequest(rightHalf, request.Batch.LogType, request.Batch.Source.Namespace, request.Batch.Source.Labels) - - // re-enforce max size restriction on each half - enforcedRequest := m.enforceMaximumsGRPCRequest(request) - enforcedOtherHalfRequest := m.enforceMaximumsGRPCRequest(otherHalfRequest) - - return append(enforcedRequest, enforcedOtherHalfRequest...) -} - -func (m *protoMarshaler) buildGRPCRequest(entries []*api.LogEntry, logType, namespace string, ingestionLabels []*api.Label) *api.BatchCreateLogsRequest { - return &api.BatchCreateLogsRequest{ - Batch: &api.LogEntryBatch{ - StartTime: timestamppb.New(m.startTime), - Entries: entries, - LogType: logType, - Source: &api.EventSource{ - CollectorId: m.collectorID, - CustomerId: m.customerID, - Labels: ingestionLabels, - Namespace: namespace, - }, - }, - } -} - -func (m *protoMarshaler) MarshalRawLogsForHTTP(ctx context.Context, ld plog.Logs) (map[string][]*api.ImportLogsRequest, error) { - rawLogs, err := m.extractRawHTTPLogs(ctx, ld) - if err != nil { - return nil, fmt.Errorf("extract raw logs: %w", err) - } - return m.constructHTTPPayloads(rawLogs), nil -} - -func (m *protoMarshaler) extractRawHTTPLogs(ctx context.Context, ld plog.Logs) (map[string][]*api.Log, error) { - entries := make(map[string][]*api.Log) - for i := 0; i < ld.ResourceLogs().Len(); i++ { - resourceLog := ld.ResourceLogs().At(i) - for j := 0; j < resourceLog.ScopeLogs().Len(); j++ { - scopeLog := resourceLog.ScopeLogs().At(j) - for k := 0; k < scopeLog.LogRecords().Len(); k++ { - logRecord := scopeLog.LogRecords().At(k) - rawLog, logType, namespace, ingestionLabels, err := m.processHTTPLogRecord(ctx, logRecord, scopeLog, resourceLog) - if err != nil { - m.set.Logger.Error("Error processing log record", zap.Error(err)) - continue - } - - if rawLog == "" { - continue - } - - var timestamp time.Time - if logRecord.Timestamp() != 0 { - timestamp = logRecord.Timestamp().AsTime() - } else { - timestamp = logRecord.ObservedTimestamp().AsTime() - } - - entry := &api.Log{ - LogEntryTime: timestamppb.New(timestamp), - CollectionTime: timestamppb.New(logRecord.ObservedTimestamp().AsTime()), - Data: []byte(rawLog), - EnvironmentNamespace: namespace, - Labels: ingestionLabels, - } - entries[logType] = append(entries[logType], entry) - } - } - } - - return entries, nil -} - -func buildForwarderString(cfg Config) string { - format := "projects/%s/locations/%s/instances/%s/forwarders/%s" - return fmt.Sprintf(format, cfg.Project, cfg.Location, cfg.CustomerID, cfg.Forwarder) -} - -func (m *protoMarshaler) constructHTTPPayloads(rawLogs map[string][]*api.Log) map[string][]*api.ImportLogsRequest { - payloads := make(map[string][]*api.ImportLogsRequest, len(rawLogs)) - - for logType, entries := range rawLogs { - if len(entries) > 0 { - request := m.buildHTTPRequest(entries) - - payloads[logType] = m.enforceMaximumsHTTPRequest(request) - } - } - return payloads -} - -func (m *protoMarshaler) enforceMaximumsHTTPRequest(request *api.ImportLogsRequest) []*api.ImportLogsRequest { - size := proto.Size(request) - logs := request.GetInlineSource().Logs - if size <= m.cfg.BatchRequestSizeLimitHTTP && len(logs) <= m.cfg.BatchLogCountLimitHTTP { - return []*api.ImportLogsRequest{ - request, - } - } - - if len(logs) < 2 { - m.set.Logger.Error("Single entry exceeds max request size. Dropping entry", zap.Int("size", size)) - return []*api.ImportLogsRequest{} - } - - // split request into two - mid := len(logs) / 2 - leftHalf := logs[:mid] - rightHalf := logs[mid:] - - request.GetInlineSource().Logs = leftHalf - otherHalfRequest := m.buildHTTPRequest(rightHalf) - - // re-enforce max size restriction on each half - enforcedRequest := m.enforceMaximumsHTTPRequest(request) - enforcedOtherHalfRequest := m.enforceMaximumsHTTPRequest(otherHalfRequest) - - return append(enforcedRequest, enforcedOtherHalfRequest...) -} - -func (m *protoMarshaler) buildHTTPRequest(entries []*api.Log) *api.ImportLogsRequest { - return &api.ImportLogsRequest{ - // TODO: Add parent and hint - // We don't yet have solid guidance on what these should be - Parent: "", - Hint: "", - - Source: &api.ImportLogsRequest_InlineSource{ - InlineSource: &api.ImportLogsRequest_LogsInlineSource{ - Forwarder: buildForwarderString(m.cfg), - Logs: entries, - }, - }, - } -} diff --git a/exporter/chronicleexporter/marshal_test.go b/exporter/chronicleexporter/marshal_test.go deleted file mode 100644 index 903f2aec9..000000000 --- a/exporter/chronicleexporter/marshal_test.go +++ /dev/null @@ -1,1706 +0,0 @@ -// Copyright observIQ, Inc. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package chronicleexporter - -import ( - "context" - "testing" - "time" - - "github.com/google/uuid" - "github.com/observiq/bindplane-agent/exporter/chronicleexporter/protos/api" - "github.com/stretchr/testify/require" - "go.opentelemetry.io/collector/component" - "go.opentelemetry.io/collector/pdata/plog" - "go.uber.org/zap" - "golang.org/x/exp/rand" - "google.golang.org/protobuf/types/known/timestamppb" -) - -func TestProtoMarshaler_MarshalRawLogs(t *testing.T) { - logger := zap.NewNop() - startTime := time.Now() - - tests := []struct { - name string - cfg Config - logRecords func() plog.Logs - expectations func(t *testing.T, requests []*api.BatchCreateLogsRequest) - }{ - { - name: "Single log record with expected data", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Test log message", map[string]any{"log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["env"]`: "prod"})) - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 1) - batch := requests[0].Batch - require.Equal(t, "WINEVTLOG", batch.LogType) - require.Len(t, batch.Entries, 1) - - // Convert Data (byte slice) to string for comparison - logDataAsString := string(batch.Entries[0].Data) - expectedLogData := `Test log message` - require.Equal(t, expectedLogData, logDataAsString) - - require.NotNil(t, batch.StartTime) - require.True(t, timestamppb.New(startTime).AsTime().Equal(batch.StartTime.AsTime()), "Start time should be set correctly") - }, - }, - { - name: "Single log record with expected data, no log_type, namespace, or ingestion labels", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: true, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Test log message", nil)) - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 1) - batch := requests[0].Batch - require.Equal(t, "WINEVTLOG", batch.LogType) - require.Equal(t, "", batch.Source.Namespace) - require.Equal(t, 0, len(batch.Source.Labels)) - require.Len(t, batch.Entries, 1) - - // Convert Data (byte slice) to string for comparison - logDataAsString := string(batch.Entries[0].Data) - expectedLogData := `Test log message` - require.Equal(t, expectedLogData, logDataAsString) - - require.NotNil(t, batch.StartTime) - require.True(t, timestamppb.New(startTime).AsTime().Equal(batch.StartTime.AsTime()), "Start time should be set correctly") - }, - }, - { - name: "Multiple log records", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - return logs - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 1, "Expected a single batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") - // Verifying the first log entry data - require.Equal(t, "First log message", string(batch.Entries[0].Data)) - // Verifying the second log entry data - require.Equal(t, "Second log message", string(batch.Entries[1].Data)) - }, - }, - { - name: "Log record with attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "attributes", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("", map[string]any{"key1": "value1", "log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"})) - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 1) - batch := requests[0].Batch - require.Len(t, batch.Entries, 1) - - // Assuming the attributes are marshaled into the Data field as a JSON string - expectedData := `{"key1":"value1", "log_type":"WINEVTLOG", "namespace":"test", "chronicle_ingestion_label[\"key1\"]": "value1", "chronicle_ingestion_label[\"key2\"]": "value2"}` - actualData := string(batch.Entries[0].Data) - require.JSONEq(t, expectedData, actualData, "Log attributes should match expected") - }, - }, - { - name: "No log records", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "DEFAULT", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - return plog.NewLogs() // No log records added - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 0, "Expected no requests due to no log records") - }, - }, - { - name: "No log type set in config or attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - RawLogField: "body", - OverrideLogType: true, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Log without logType", map[string]any{"namespace": "test", `ingestion_label["realkey1"]`: "realvalue1", `ingestion_label["realkey2"]`: "realvalue2"})) - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 1) - batch := requests[0].Batch - require.Equal(t, "", batch.LogType, "Expected log type to be empty") - }, - }, - { - name: "Multiple log records with duplicate data, no log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - return logs - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify one request for log type in config - require.Len(t, requests, 1, "Expected a single batch request") - batch := requests[0].Batch - // verify batch source labels - require.Len(t, batch.Source.Labels, 2) - require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") - // Verifying the first log entry data - require.Equal(t, "First log message", string(batch.Entries[0].Data)) - // Verifying the second log entry data - require.Equal(t, "Second log message", string(batch.Entries[1].Data)) - }, - }, - { - name: "Multiple log records with different data, no log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) - return logs - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify one request for one log type - require.Len(t, requests, 1, "Expected a single batch request") - batch := requests[0].Batch - require.Equal(t, "WINEVTLOG", batch.LogType) - require.Equal(t, "", batch.Source.Namespace) - // verify batch source labels - require.Len(t, batch.Source.Labels, 4) - require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") - // Verifying the first log entry data - require.Equal(t, "First log message", string(batch.Entries[0].Data)) - // Verifying the second log entry data - require.Equal(t, "Second log message", string(batch.Entries[1].Data)) - }, - }, - { - name: "Override log type with attribute", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "DEFAULT", // This should be overridden by the log_type attribute - RawLogField: "body", - OverrideLogType: true, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"log_type": "windows_event.application", "namespace": "test", `ingestion_label["realkey1"]`: "realvalue1", `ingestion_label["realkey2"]`: "realvalue2"})) - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 1) - batch := requests[0].Batch - require.Equal(t, "WINEVTLOG", batch.LogType, "Expected log type to be overridden by attribute") - }, - }, - { - name: "Override log type with chronicle attribute", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "DEFAULT", // This should be overridden by the chronicle_log_type attribute - RawLogField: "body", - OverrideLogType: true, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"chronicle_log_type": "ASOC_ALERT", "chronicle_namespace": "test", `chronicle_ingestion_label["realkey1"]`: "realvalue1", `chronicle_ingestion_label["realkey2"]`: "realvalue2"})) - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - require.Len(t, requests, 1) - batch := requests[0].Batch - require.Equal(t, "ASOC_ALERT", batch.LogType, "Expected log type to be overridden by attribute") - require.Equal(t, "test", batch.Source.Namespace, "Expected namespace to be overridden by attribute") - expectedLabels := map[string]string{ - "realkey1": "realvalue1", - "realkey2": "realvalue2", - } - for _, label := range batch.Source.Labels { - require.Equal(t, expectedLabels[label.Key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Multiple log records with duplicate data, log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - return logs - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify 1 request, 2 batches for same log type - require.Len(t, requests, 1, "Expected a single batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 2, "Expected two log entries in the batch") - // verify batch for first log - require.Equal(t, "WINEVTLOGS", batch.LogType) - require.Equal(t, "test1", batch.Source.Namespace) - require.Len(t, batch.Source.Labels, 2) - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - } - for _, label := range batch.Source.Labels { - require.Equal(t, expectedLabels[label.Key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Multiple log records with different data, log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS2", "chronicle_namespace": "test2", `chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) - return logs - }, - - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify 2 requests, with 1 batch for different log types - require.Len(t, requests, 2, "Expected a two batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 1, "Expected one log entries in the batch") - // verify batch for first log - require.Contains(t, batch.LogType, "WINEVTLOGS") - require.Contains(t, batch.Source.Namespace, "test") - require.Len(t, batch.Source.Labels, 2) - - batch2 := requests[1].Batch - require.Len(t, batch2.Entries, 1, "Expected one log entries in the batch") - // verify batch for second log - require.Contains(t, batch2.LogType, "WINEVTLOGS") - require.Contains(t, batch2.Source.Namespace, "test") - require.Len(t, batch2.Source.Labels, 2) - // verify ingestion labels - for _, req := range requests { - for _, label := range req.Batch.Source.Labels { - require.Contains(t, []string{ - "key1", - "key2", - "key3", - "key4", - }, label.Key) - require.Contains(t, []string{ - "value1", - "value2", - "value3", - "value4", - }, label.Value) - } - } - }, - }, - { - name: "Many logs, all one batch", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - for i := 0; i < 1000; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr("Log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - return logs - }, - - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify 1 request, with 1 batch - require.Len(t, requests, 1, "Expected a one-batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 1000, "Expected 1000 log entries in the batch") - // verify batch for first log - require.Contains(t, batch.LogType, "WINEVTLOGS") - require.Contains(t, batch.Source.Namespace, "test") - require.Len(t, batch.Source.Labels, 2) - - // verify ingestion labels - for _, req := range requests { - for _, label := range req.Batch.Source.Labels { - require.Contains(t, []string{ - "key1", - "key2", - "key3", - "key4", - }, label.Key) - require.Contains(t, []string{ - "value1", - "value2", - "value3", - "value4", - }, label.Value) - } - } - }, - }, - { - name: "Single batch split into multiple because more than 1000 logs", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - for i := 0; i < 1001; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr("Log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - return logs - }, - - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify 1 request, with 1 batch - require.Len(t, requests, 2, "Expected a two-batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 500, "Expected 500 log entries in the first batch") - // verify batch for first log - require.Contains(t, batch.LogType, "WINEVTLOGS") - require.Contains(t, batch.Source.Namespace, "test") - require.Len(t, batch.Source.Labels, 2) - - batch2 := requests[1].Batch - require.Len(t, batch2.Entries, 501, "Expected 501 log entries in the second batch") - // verify batch for first log - require.Contains(t, batch2.LogType, "WINEVTLOGS") - require.Contains(t, batch2.Source.Namespace, "test") - require.Len(t, batch2.Source.Labels, 2) - - // verify ingestion labels - for _, req := range requests { - for _, label := range req.Batch.Source.Labels { - require.Contains(t, []string{ - "key1", - "key2", - "key3", - "key4", - }, label.Key) - require.Contains(t, []string{ - "value1", - "value2", - "value3", - "value4", - }, label.Value) - } - } - }, - }, - { - name: "Recursively split batch, exceeds 1000 entries multiple times", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - for i := 0; i < 2002; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr("Log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - return logs - }, - - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify 1 request, with 1 batch - require.Len(t, requests, 4, "Expected a four-batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 500, "Expected 500 log entries in the first batch") - // verify batch for first log - require.Contains(t, batch.LogType, "WINEVTLOGS") - require.Contains(t, batch.Source.Namespace, "test") - require.Len(t, batch.Source.Labels, 2) - - batch2 := requests[1].Batch - require.Len(t, batch2.Entries, 501, "Expected 501 log entries in the second batch") - // verify batch for first log - require.Contains(t, batch2.LogType, "WINEVTLOGS") - require.Contains(t, batch2.Source.Namespace, "test") - require.Len(t, batch2.Source.Labels, 2) - - batch3 := requests[2].Batch - require.Len(t, batch3.Entries, 500, "Expected 500 log entries in the third batch") - // verify batch for first log - require.Contains(t, batch3.LogType, "WINEVTLOGS") - require.Contains(t, batch3.Source.Namespace, "test") - require.Len(t, batch3.Source.Labels, 2) - - batch4 := requests[3].Batch - require.Len(t, batch4.Entries, 501, "Expected 501 log entries in the fourth batch") - // verify batch for first log - require.Contains(t, batch4.LogType, "WINEVTLOGS") - require.Contains(t, batch4.Source.Namespace, "test") - require.Len(t, batch4.Source.Labels, 2) - - // verify ingestion labels - for _, req := range requests { - for _, label := range req.Batch.Source.Labels { - require.Contains(t, []string{ - "key1", - "key2", - "key3", - "key4", - }, label.Key) - require.Contains(t, []string{ - "value1", - "value2", - "value3", - "value4", - }, label.Value) - } - } - }, - }, - { - name: "Single batch split into multiple because request size too large", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - // create 640 logs with size 8192 bytes each - totalling 5242880 bytes. non-body fields put us over limit - for i := 0; i < 640; i++ { - record1 := logRecords.AppendEmpty() - body := tokenWithLength(8192) - record1.Body().SetStr(string(body)) - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - return logs - }, - - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify request, with 1 batch - require.Len(t, requests, 2, "Expected a two-batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 320, "Expected 320 log entries in the first batch") - // verify batch for first log - require.Contains(t, batch.LogType, "WINEVTLOGS") - require.Contains(t, batch.Source.Namespace, "test") - require.Len(t, batch.Source.Labels, 2) - - batch2 := requests[1].Batch - require.Len(t, batch2.Entries, 320, "Expected 320 log entries in the second batch") - // verify batch for first log - require.Contains(t, batch2.LogType, "WINEVTLOGS") - require.Contains(t, batch2.Source.Namespace, "test") - require.Len(t, batch2.Source.Labels, 2) - - // verify ingestion labels - for _, req := range requests { - for _, label := range req.Batch.Source.Labels { - require.Contains(t, []string{ - "key1", - "key2", - "key3", - "key4", - }, label.Key) - require.Contains(t, []string{ - "value1", - "value2", - "value3", - "value4", - }, label.Value) - } - } - }, - }, - { - name: "Recursively split batch into multiple because request size too large", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - // create 1280 logs with size 8192 bytes each - totalling 5242880 * 2 bytes. non-body fields put us over twice the limit - for i := 0; i < 1280; i++ { - record1 := logRecords.AppendEmpty() - body := tokenWithLength(8192) - record1.Body().SetStr(string(body)) - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - return logs - }, - - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify 1 request, with 1 batch - require.Len(t, requests, 4, "Expected a four-batch request") - batch := requests[0].Batch - require.Len(t, batch.Entries, 320, "Expected 320 log entries in the first batch") - // verify batch for first log - require.Contains(t, batch.LogType, "WINEVTLOGS") - require.Contains(t, batch.Source.Namespace, "test") - require.Len(t, batch.Source.Labels, 2) - - batch2 := requests[1].Batch - require.Len(t, batch2.Entries, 320, "Expected 320 log entries in the second batch") - // verify batch for first log - require.Contains(t, batch2.LogType, "WINEVTLOGS") - require.Contains(t, batch2.Source.Namespace, "test") - require.Len(t, batch2.Source.Labels, 2) - - batch3 := requests[2].Batch - require.Len(t, batch3.Entries, 320, "Expected 320 log entries in the third batch") - // verify batch for first log - require.Contains(t, batch3.LogType, "WINEVTLOGS") - require.Contains(t, batch3.Source.Namespace, "test") - require.Len(t, batch3.Source.Labels, 2) - - batch4 := requests[3].Batch - require.Len(t, batch4.Entries, 320, "Expected 320 log entries in the fourth batch") - // verify batch for first log - require.Contains(t, batch4.LogType, "WINEVTLOGS") - require.Contains(t, batch4.Source.Namespace, "test") - require.Len(t, batch4.Source.Labels, 2) - - // verify ingestion labels - for _, req := range requests { - for _, label := range req.Batch.Source.Labels { - require.Contains(t, []string{ - "key1", - "key2", - "key3", - "key4", - }, label.Key) - require.Contains(t, []string{ - "value1", - "value2", - "value3", - "value4", - }, label.Value) - } - } - }, - }, - { - name: "Unsplittable batch, single log exceeds max request size", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - body := tokenWithLength(5242881) - record1.Body().SetStr(string(body)) - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - return logs - }, - - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // verify 1 request, with 1 batch - require.Len(t, requests, 0, "Expected a zero requests") - }, - }, - { - name: "Multiple valid log records + unsplittable log entries", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitGRPC: 1000, - BatchRequestSizeLimitGRPC: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - tooLargeBody := string(tokenWithLength(5242881)) - // first normal log, then impossible to split log - logRecords1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - record1 := logRecords1.AppendEmpty() - record1.Body().SetStr("First log message") - tooLargeRecord1 := logRecords1.AppendEmpty() - tooLargeRecord1.Body().SetStr(tooLargeBody) - // first impossible to split log, then normal log - logRecords2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - tooLargeRecord2 := logRecords2.AppendEmpty() - tooLargeRecord2.Body().SetStr(tooLargeBody) - record2 := logRecords2.AppendEmpty() - record2.Body().SetStr("Second log message") - return logs - }, - expectations: func(t *testing.T, requests []*api.BatchCreateLogsRequest) { - // this is a kind of weird edge case, the overly large logs makes the final requests quite inefficient, but it's going to be so rare that the inefficiency isn't a real concern - require.Len(t, requests, 2, "Expected two batch requests") - batch1 := requests[0].Batch - require.Len(t, batch1.Entries, 1, "Expected one log entry in the first batch") - // Verifying the first log entry data - require.Equal(t, "First log message", string(batch1.Entries[0].Data)) - - batch2 := requests[1].Batch - require.Len(t, batch2.Entries, 1, "Expected one log entry in the second batch") - // Verifying the second log entry data - require.Equal(t, "Second log message", string(batch2.Entries[0].Data)) - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - customerID, err := uuid.Parse(tt.cfg.CustomerID) - require.NoError(t, err) - - marshaler, err := newProtoMarshaler(tt.cfg, component.TelemetrySettings{Logger: logger}, customerID[:]) - marshaler.startTime = startTime - require.NoError(t, err) - - logs := tt.logRecords() - requests, err := marshaler.MarshalRawLogs(context.Background(), logs) - require.NoError(t, err) - - tt.expectations(t, requests) - }) - } -} - -func TestProtoMarshaler_MarshalRawLogsForHTTP(t *testing.T) { - logger := zap.NewNop() - startTime := time.Now() - - tests := []struct { - name string - cfg Config - labels []*api.Label - logRecords func() plog.Logs - expectations func(t *testing.T, requests map[string][]*api.ImportLogsRequest) - }{ - { - name: "Single log record with expected data", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - Protocol: protocolHTTPS, - Project: "test-project", - Location: "us", - Forwarder: uuid.New().String(), - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - labels: []*api.Label{ - {Key: "env", Value: "prod"}, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Test log message", map[string]any{"log_type": "WINEVTLOG", "namespace": "test"})) - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1) - logs := requests["WINEVTLOG"][0].GetInlineSource().Logs - require.Len(t, logs, 1) - // Convert Data (byte slice) to string for comparison - logDataAsString := string(logs[0].Data) - expectedLogData := `Test log message` - require.Equal(t, expectedLogData, logDataAsString) - }, - }, - { - name: "Multiple log records", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - labels: []*api.Label{ - {Key: "env", Value: "staging"}, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - return logs - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1, "Expected a single batch request") - logs := requests["WINEVTLOG"][0].GetInlineSource().Logs - require.Len(t, logs, 2, "Expected two log entries in the batch") - // Verifying the first log entry data - require.Equal(t, "First log message", string(logs[0].Data)) - // Verifying the second log entry data - require.Equal(t, "Second log message", string(logs[1].Data)) - }, - }, - { - name: "Log record with attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "attributes", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - labels: []*api.Label{}, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("", map[string]any{"key1": "value1", "log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"})) - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1) - logs := requests["WINEVTLOG"][0].GetInlineSource().Logs - // Assuming the attributes are marshaled into the Data field as a JSON string - expectedData := `{"key1":"value1", "log_type":"WINEVTLOG", "namespace":"test", "chronicle_ingestion_label[\"key1\"]": "value1", "chronicle_ingestion_label[\"key2\"]": "value2"}` - actualData := string(logs[0].Data) - require.JSONEq(t, expectedData, actualData, "Log attributes should match expected") - }, - }, - { - name: "No log records", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "DEFAULT", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - labels: []*api.Label{}, - logRecords: func() plog.Logs { - return plog.NewLogs() // No log records added - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 0, "Expected no requests due to no log records") - }, - }, - { - name: "No log type set in config or attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "attributes", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - labels: []*api.Label{}, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("", map[string]any{"key1": "value1", "log_type": "WINEVTLOG", "namespace": "test", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"})) - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1) - logs := requests["WINEVTLOG"][0].GetInlineSource().Logs - // Assuming the attributes are marshaled into the Data field as a JSON string - expectedData := `{"key1":"value1", "log_type":"WINEVTLOG", "namespace":"test", "chronicle_ingestion_label[\"key1\"]": "value1", "chronicle_ingestion_label[\"key2\"]": "value2"}` - actualData := string(logs[0].Data) - require.JSONEq(t, expectedData, actualData, "Log attributes should match expected") - }, - }, - { - name: "Multiple log records with duplicate data, no log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{"chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - return logs - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - // verify one request for log type in config - require.Len(t, requests, 1, "Expected a single batch request") - logs := requests["WINEVTLOG"][0].GetInlineSource().Logs - // verify batch source labels - require.Len(t, logs[0].Labels, 2) - require.Len(t, logs, 2, "Expected two log entries in the batch") - // Verifying the first log entry data - require.Equal(t, "First log message", string(logs[0].Data)) - // Verifying the second log entry data - require.Equal(t, "Second log message", string(logs[1].Data)) - }, - }, - { - name: "Multiple log records with different data, no log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{`chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) - return logs - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - // verify one request for one log type - require.Len(t, requests, 1, "Expected a single batch request") - logs := requests["WINEVTLOG"][0].GetInlineSource().Logs - require.Len(t, logs, 2, "Expected two log entries in the batch") - require.Equal(t, "", logs[0].EnvironmentNamespace) - // verify batch source labels - require.Len(t, logs[0].Labels, 2) - require.Len(t, logs[1].Labels, 2) - // Verifying the first log entry data - require.Equal(t, "First log message", string(logs[0].Data)) - // Verifying the second log entry data - require.Equal(t, "Second log message", string(logs[1].Data)) - }, - }, - { - name: "Override log type with attribute", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "DEFAULT", // This should be overridden by the log_type attribute - RawLogField: "body", - OverrideLogType: true, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"log_type": "windows_event.application", "namespace": "test", `ingestion_label["realkey1"]`: "realvalue1", `ingestion_label["realkey2"]`: "realvalue2"})) - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1) - logs := requests["WINEVTLOG"][0].GetInlineSource().Logs - require.NotEqual(t, len(logs), 0) - }, - }, - { - name: "Override log type with chronicle attribute", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "DEFAULT", // This should be overridden by the chronicle_log_type attribute - RawLogField: "body", - OverrideLogType: true, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - return mockLogs(mockLogRecord("Log with overridden type", map[string]any{"chronicle_log_type": "ASOC_ALERT", "chronicle_namespace": "test", `chronicle_ingestion_label["realkey1"]`: "realvalue1", `chronicle_ingestion_label["realkey2"]`: "realvalue2"})) - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1) - logs := requests["ASOC_ALERT"][0].GetInlineSource().Logs - require.Equal(t, "test", logs[0].EnvironmentNamespace, "Expected namespace to be overridden by attribute") - expectedLabels := map[string]string{ - "realkey1": "realvalue1", - "realkey2": "realvalue2", - } - for key, label := range logs[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Multiple log records with duplicate data, log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - return logs - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - // verify 1 request, 2 batches for same log type - require.Len(t, requests, 1, "Expected a single batch request") - logs := requests["WINEVTLOGS"][0].GetInlineSource().Logs - require.Len(t, logs, 2, "Expected two log entries in the batch") - // verify variables - require.Equal(t, "test1", logs[0].EnvironmentNamespace) - require.Len(t, logs[0].Labels, 2) - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - } - for key, label := range logs[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Multiple log records with different data, log type in attributes", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - - record2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record2.Body().SetStr("Second log message") - record2.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS2", "chronicle_namespace": "test2", `chronicle_ingestion_label["key3"]`: "value3", `chronicle_ingestion_label["key4"]`: "value4"}) - return logs - }, - - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - "key3": "value3", - "key4": "value4", - } - // verify 2 requests, with 1 batch for different log types - require.Len(t, requests, 2, "Expected a two batch request") - - logs1 := requests["WINEVTLOGS1"][0].GetInlineSource().Logs - require.Len(t, logs1, 1, "Expected one log entries in the batch") - // verify variables for first log - require.Equal(t, logs1[0].EnvironmentNamespace, "test1") - require.Len(t, logs1[0].Labels, 2) - for key, label := range logs1[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs2 := requests["WINEVTLOGS2"][0].GetInlineSource().Logs - require.Len(t, logs2, 1, "Expected one log entries in the batch") - // verify variables for second log - require.Equal(t, logs2[0].EnvironmentNamespace, "test2") - require.Len(t, logs2[0].Labels, 2) - for key, label := range logs2[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Many log records all one batch", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - for i := 0; i < 1000; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - - return logs - }, - - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - } - // verify 1 requests - require.Len(t, requests, 1, "Expected a one batch request") - - logs1 := requests["WINEVTLOGS1"][0].GetInlineSource().Logs - require.Len(t, logs1, 1000, "Expected one thousand log entries in the batch") - // verify variables for first log - require.Equal(t, logs1[0].EnvironmentNamespace, "test1") - require.Len(t, logs1[0].Labels, 2) - for key, label := range logs1[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Many log records split into two batches", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - for i := 0; i < 1001; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - - return logs - }, - - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - } - // verify 1 request log type - require.Len(t, requests, 1, "Expected one log type for the requests") - winEvtLogRequests := requests["WINEVTLOGS1"] - require.Len(t, winEvtLogRequests, 2, "Expected two batches") - - logs1 := winEvtLogRequests[0].GetInlineSource().Logs - require.Len(t, logs1, 500, "Expected 500 log entries in the first batch") - // verify variables for first log - require.Equal(t, logs1[0].EnvironmentNamespace, "test1") - require.Len(t, logs1[0].Labels, 2) - for key, label := range logs1[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs2 := winEvtLogRequests[1].GetInlineSource().Logs - require.Len(t, logs2, 501, "Expected 501 log entries in the second batch") - // verify variables for first log - require.Equal(t, logs2[0].EnvironmentNamespace, "test1") - require.Len(t, logs2[0].Labels, 2) - for key, label := range logs2[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Recursively split batch multiple times because too many logs", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - for i := 0; i < 2002; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr("First log message") - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - - return logs - }, - - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - } - // verify 1 request log type - require.Len(t, requests, 1, "Expected one log type for the requests") - winEvtLogRequests := requests["WINEVTLOGS1"] - require.Len(t, winEvtLogRequests, 4, "Expected four batches") - - logs1 := winEvtLogRequests[0].GetInlineSource().Logs - require.Len(t, logs1, 500, "Expected 500 log entries in the first batch") - // verify variables for first log - require.Equal(t, logs1[0].EnvironmentNamespace, "test1") - require.Len(t, logs1[0].Labels, 2) - for key, label := range logs1[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs2 := winEvtLogRequests[1].GetInlineSource().Logs - require.Len(t, logs2, 501, "Expected 501 log entries in the second batch") - // verify variables for first log - require.Equal(t, logs2[0].EnvironmentNamespace, "test1") - require.Len(t, logs2[0].Labels, 2) - for key, label := range logs2[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs3 := winEvtLogRequests[2].GetInlineSource().Logs - require.Len(t, logs3, 500, "Expected 500 log entries in the third batch") - // verify variables for first log - require.Equal(t, logs3[0].EnvironmentNamespace, "test1") - require.Len(t, logs3[0].Labels, 2) - for key, label := range logs3[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs4 := winEvtLogRequests[3].GetInlineSource().Logs - require.Len(t, logs4, 501, "Expected 501 log entries in the fourth batch") - // verify variables for first log - require.Equal(t, logs4[0].EnvironmentNamespace, "test1") - require.Len(t, logs4[0].Labels, 2) - for key, label := range logs4[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Many log records split into two batches because request size too large", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - // 8192 * 640 = 5242880 - body := tokenWithLength(8192) - for i := 0; i < 640; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr(string(body)) - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - - return logs - }, - - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - } - // verify 1 request log type - require.Len(t, requests, 1, "Expected one log type for the requests") - winEvtLogRequests := requests["WINEVTLOGS1"] - require.Len(t, winEvtLogRequests, 2, "Expected two batches") - - logs1 := winEvtLogRequests[0].GetInlineSource().Logs - require.Len(t, logs1, 320, "Expected 320 log entries in the first batch") - // verify variables for first log - require.Equal(t, logs1[0].EnvironmentNamespace, "test1") - require.Len(t, logs1[0].Labels, 2) - for key, label := range logs1[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs2 := winEvtLogRequests[1].GetInlineSource().Logs - require.Len(t, logs2, 320, "Expected 320 log entries in the second batch") - // verify variables for first log - require.Equal(t, logs2[0].EnvironmentNamespace, "test1") - require.Len(t, logs2[0].Labels, 2) - for key, label := range logs2[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Recursively split into batches because request size too large", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 2000, - BatchRequestSizeLimitHTTP: 5242880, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - logRecords := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - // 8192 * 1280 = 5242880 * 2 - body := tokenWithLength(8192) - for i := 0; i < 1280; i++ { - record1 := logRecords.AppendEmpty() - record1.Body().SetStr(string(body)) - record1.Attributes().FromRaw(map[string]any{"chronicle_log_type": "WINEVTLOGS1", "chronicle_namespace": "test1", `chronicle_ingestion_label["key1"]`: "value1", `chronicle_ingestion_label["key2"]`: "value2"}) - } - - return logs - }, - - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - expectedLabels := map[string]string{ - "key1": "value1", - "key2": "value2", - } - // verify 1 request log type - require.Len(t, requests, 1, "Expected one log type for the requests") - winEvtLogRequests := requests["WINEVTLOGS1"] - require.Len(t, winEvtLogRequests, 4, "Expected four batches") - - logs1 := winEvtLogRequests[0].GetInlineSource().Logs - require.Len(t, logs1, 320, "Expected 320 log entries in the first batch") - // verify variables for first log - require.Equal(t, logs1[0].EnvironmentNamespace, "test1") - require.Len(t, logs1[0].Labels, 2) - for key, label := range logs1[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs2 := winEvtLogRequests[1].GetInlineSource().Logs - require.Len(t, logs2, 320, "Expected 320 log entries in the second batch") - // verify variables for first log - require.Equal(t, logs2[0].EnvironmentNamespace, "test1") - require.Len(t, logs2[0].Labels, 2) - for key, label := range logs2[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs3 := winEvtLogRequests[2].GetInlineSource().Logs - require.Len(t, logs3, 320, "Expected 320 log entries in the third batch") - // verify variables for first log - require.Equal(t, logs3[0].EnvironmentNamespace, "test1") - require.Len(t, logs3[0].Labels, 2) - for key, label := range logs3[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - - logs4 := winEvtLogRequests[3].GetInlineSource().Logs - require.Len(t, logs4, 320, "Expected 320 log entries in the fourth batch") - // verify variables for first log - require.Equal(t, logs4[0].EnvironmentNamespace, "test1") - require.Len(t, logs4[0].Labels, 2) - for key, label := range logs4[0].Labels { - require.Equal(t, expectedLabels[key], label.Value, "Expected ingestion label to be overridden by attribute") - } - }, - }, - { - name: "Unsplittable log record, single log exceeds request size limit", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 100000, - }, - labels: []*api.Label{ - {Key: "env", Value: "staging"}, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - record1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords().AppendEmpty() - record1.Body().SetStr(string(tokenWithLength(100000))) - return logs - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1, "Expected one log type") - require.Len(t, requests["WINEVTLOG"], 0, "Expected WINEVTLOG log type to have zero requests") - }, - }, - { - name: "Unsplittable log record, single log exceeds request size limit, mixed with okay logs", - cfg: Config{ - CustomerID: uuid.New().String(), - LogType: "WINEVTLOG", - RawLogField: "body", - OverrideLogType: false, - BatchLogCountLimitHTTP: 1000, - BatchRequestSizeLimitHTTP: 100000, - }, - labels: []*api.Label{ - {Key: "env", Value: "staging"}, - }, - logRecords: func() plog.Logs { - logs := plog.NewLogs() - tooLargeBody := string(tokenWithLength(100001)) - // first normal log, then impossible to split log - logRecords1 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - record1 := logRecords1.AppendEmpty() - record1.Body().SetStr("First log message") - tooLargeRecord1 := logRecords1.AppendEmpty() - tooLargeRecord1.Body().SetStr(tooLargeBody) - // first impossible to split log, then normal log - logRecords2 := logs.ResourceLogs().AppendEmpty().ScopeLogs().AppendEmpty().LogRecords() - tooLargeRecord2 := logRecords2.AppendEmpty() - tooLargeRecord2.Body().SetStr(tooLargeBody) - record2 := logRecords2.AppendEmpty() - record2.Body().SetStr("Second log message") - return logs - }, - expectations: func(t *testing.T, requests map[string][]*api.ImportLogsRequest) { - require.Len(t, requests, 1, "Expected one log type") - winEvtLogRequests := requests["WINEVTLOG"] - require.Len(t, winEvtLogRequests, 2, "Expected WINEVTLOG log type to have zero requests") - - logs1 := winEvtLogRequests[0].GetInlineSource().Logs - require.Len(t, logs1, 1, "Expected 1 log entry in the first batch") - require.Equal(t, string(logs1[0].Data), "First log message") - - logs2 := winEvtLogRequests[1].GetInlineSource().Logs - require.Len(t, logs2, 1, "Expected 1 log entry in the second batch") - require.Equal(t, string(logs2[0].Data), "Second log message") - }, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - customerID, err := uuid.Parse(tt.cfg.CustomerID) - require.NoError(t, err) - - marshaler, err := newProtoMarshaler(tt.cfg, component.TelemetrySettings{Logger: logger}, customerID[:]) - marshaler.startTime = startTime - require.NoError(t, err) - - logs := tt.logRecords() - requests, err := marshaler.MarshalRawLogsForHTTP(context.Background(), logs) - require.NoError(t, err) - - tt.expectations(t, requests) - }) - } -} - -func tokenWithLength(length int) []byte { - charset := "abcdefghijklmnopqrstuvwxyz" - b := make([]byte, length) - for i := range b { - b[i] = charset[rand.Intn(len(charset))] - } - return b -} - -func mockLogRecord(body string, attributes map[string]any) plog.LogRecord { - lr := plog.NewLogRecord() - lr.Body().SetStr(body) - for k, v := range attributes { - switch val := v.(type) { - case string: - lr.Attributes().PutStr(k, val) - default: - } - } - return lr -} - -func mockLogs(record plog.LogRecord) plog.Logs { - logs := plog.NewLogs() - rl := logs.ResourceLogs().AppendEmpty() - sl := rl.ScopeLogs().AppendEmpty() - record.CopyTo(sl.LogRecords().AppendEmpty()) - return logs -} - -type getRawFieldCase struct { - name string - field string - logRecord plog.LogRecord - scope plog.ScopeLogs - resource plog.ResourceLogs - expect string - expectErrStr string -} - -// Used by tests and benchmarks -var getRawFieldCases = []getRawFieldCase{ - { - name: "String body", - field: "body", - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Body().SetStr("703604000x80800000000000003562SystemWIN-L6PC55MPB98Print Spoolerstopped530070006F006F006C00650072002F0031000000") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "703604000x80800000000000003562SystemWIN-L6PC55MPB98Print Spoolerstopped530070006F006F006C00650072002F0031000000", - }, - { - name: "Empty body", - field: "body", - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Body().SetStr("") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "", - }, - { - name: "Map body", - field: "body", - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Body().SetEmptyMap() - lr.Body().Map().PutStr("param1", "Print Spooler") - lr.Body().Map().PutStr("param2", "stopped") - lr.Body().Map().PutStr("binary", "530070006F006F006C00650072002F0031000000") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: `{"binary":"530070006F006F006C00650072002F0031000000","param1":"Print Spooler","param2":"stopped"}`, - }, - { - name: "Map body field", - field: "body[\"param1\"]", - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Body().SetEmptyMap() - lr.Body().Map().PutStr("param1", "Print Spooler") - lr.Body().Map().PutStr("param2", "stopped") - lr.Body().Map().PutStr("binary", "530070006F006F006C00650072002F0031000000") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "Print Spooler", - }, - { - name: "Map body field missing", - field: "body[\"missing\"]", - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Body().SetEmptyMap() - lr.Body().Map().PutStr("param1", "Print Spooler") - lr.Body().Map().PutStr("param2", "stopped") - lr.Body().Map().PutStr("binary", "530070006F006F006C00650072002F0031000000") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "", - }, - { - name: "Attribute log_type", - field: `attributes["log_type"]`, - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Attributes().PutStr("status", "200") - lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") - lr.Attributes().PutStr("log_type", "WINEVTLOG") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "WINEVTLOG", - }, - { - name: "Attribute log_type missing", - field: `attributes["log_type"]`, - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Attributes().PutStr("status", "200") - lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "", - }, - { - name: "Attribute chronicle_log_type", - field: `attributes["chronicle_log_type"]`, - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Attributes().PutStr("status", "200") - lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") - lr.Attributes().PutStr("chronicle_log_type", "MICROSOFT_SQL") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "MICROSOFT_SQL", - }, - { - name: "Attribute chronicle_namespace", - field: `attributes["chronicle_namespace"]`, - logRecord: func() plog.LogRecord { - lr := plog.NewLogRecord() - lr.Attributes().PutStr("status", "200") - lr.Attributes().PutStr("log_type", "k8s-container") - lr.Attributes().PutStr("log.file.name", "/var/log/containers/agent_agent_ns.log") - lr.Attributes().PutStr("chronicle_log_type", "MICROSOFT_SQL") - lr.Attributes().PutStr("chronicle_namespace", "test") - return lr - }(), - scope: plog.NewScopeLogs(), - resource: plog.NewResourceLogs(), - expect: "test", - }, -} - -func Test_getRawField(t *testing.T) { - for _, tc := range getRawFieldCases { - t.Run(tc.name, func(t *testing.T) { - m := &protoMarshaler{} - m.set.Logger = zap.NewNop() - - ctx := context.Background() - - rawField, err := m.getRawField(ctx, tc.field, tc.logRecord, tc.scope, tc.resource) - if tc.expectErrStr != "" { - require.Contains(t, err.Error(), tc.expectErrStr) - return - } - - require.NoError(t, err) - require.Equal(t, tc.expect, rawField) - }) - } -} - -func Benchmark_getRawField(b *testing.B) { - m := &protoMarshaler{} - m.set.Logger = zap.NewNop() - - ctx := context.Background() - - for _, tc := range getRawFieldCases { - b.ResetTimer() - b.Run(tc.name, func(b *testing.B) { - for i := 0; i < b.N; i++ { - _, _ = m.getRawField(ctx, tc.field, tc.logRecord, tc.scope, tc.resource) - } - }) - } -}