Skip to content

Commit

Permalink
cache error group fingerprinting logic (highlight#9434)
Browse files Browse the repository at this point in the history
## Summary

Before performing expensive error object matching against an error
group, try match
the error object exactly by using the event + stacktrace as a cache
lookup key.

## How did you test this change?

Unit testing which exercises logic which helped identify the need to run
logic to update error group based on new error object

## Are there any deployment considerations?

no

## Does this work require review from our design team?

no
  • Loading branch information
Vadman97 authored and giraffekey committed Nov 21, 2024
1 parent aedc095 commit 9620894
Show file tree
Hide file tree
Showing 18 changed files with 310 additions and 471 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ jobs:
e2e-cypress:
name: E2E Client Cypress
timeout-minutes: 60
runs-on: buildjet-4vcpu-ubuntu-2204
runs-on: buildjet-8vcpu-ubuntu-2204

steps:
- name: Checkout
Expand Down
3 changes: 3 additions & 0 deletions backend/embeddings/embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"encoding/json"
"fmt"
"github.com/highlight-run/highlight/backend/env"
"github.com/highlight-run/highlight/backend/util"
"io"
"math"
"net/http"
Expand Down Expand Up @@ -194,6 +195,8 @@ func GetErrorObjectQuery(errorObj *model.ErrorObject) string {
}

func (c *HuggingfaceModelClient) GetEmbeddings(ctx context.Context, errors []*model.ErrorObject) ([]*model.ErrorObjectEmbeddings, error) {
span, ctx := util.StartSpanFromContext(ctx, "huggingface.GetEmbeddings", util.Tag("num_errors", len(errors)))
defer span.Finish()
start := time.Now()
var combinedInputs []string
for _, errorObject := range errors {
Expand Down
2 changes: 1 addition & 1 deletion backend/env/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ func EnvironmentName() string {
func ConsumerSpanSamplingRate() float64 {
i, err := strconv.ParseInt(Config.ConsumerFraction, 10, 64)
if err != nil {
i = 1_000
i = 1_000_000
}
return 1. / float64(i)
}
11 changes: 11 additions & 0 deletions backend/errorgroups/fingerprint.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package errorgroups

import (
"fmt"
"github.com/aws/smithy-go/ptr"
"strconv"
"strings"

Expand Down Expand Up @@ -38,6 +40,15 @@ func GetFingerprints(projectID int, errorTraces []*privateModel.ErrorTrace) []*m
return fingerprints
}

func GetKey(projectID int, errorObj *model.ErrorObject, structuredStackTrace []*privateModel.ErrorTrace) string {
var fingerprintsStr string
for _, fp := range GetFingerprints(projectID, structuredStackTrace) {
fingerprintsStr = fmt.Sprintf("%s%s%s%d ", fingerprintsStr, fp.Type, fp.Value, fp.Index)
}
stackBody := joinStringPtrs(errorObj.StackTrace, ptr.String(fingerprintsStr))
return fmt.Sprintf("error-object-group-%d-%s-%s", projectID, errorObj.Event, stackBody)
}

func joinStringPtrs(ptrs ...*string) string {
var sb strings.Builder
for _, ptr := range ptrs {
Expand Down
3 changes: 1 addition & 2 deletions backend/parser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,14 @@ import (
modelInputs "github.com/highlight-run/highlight/backend/private-graph/graph/model"
"github.com/highlight-run/highlight/backend/util"
"github.com/huandu/go-sqlbuilder"
"go.opentelemetry.io/otel/trace"
)

func GetSearchListener(sqlBuilder *sqlbuilder.SelectBuilder, query string, tableConfig model.TableConfig) *listener.SearchListener {
return listener.NewSearchListener(sqlBuilder, tableConfig)
}

func GetSearchFilters(query string, tableConfig model.TableConfig, listener *listener.SearchListener) listener.Filters {
s := util.StartSpan("GetSearchFilters", util.WithSpanKind(trace.SpanKindServer), util.Tag("query", query))
s := util.StartSpan("GetSearchFilters", util.Tag("query", query))
defer s.Finish()

if !strings.Contains(query, string(modelInputs.ReservedTraceKeyMetricName)) {
Expand Down
73 changes: 58 additions & 15 deletions backend/public-graph/graph/resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,9 @@ func (r *Resolver) tagErrorGroup(ctx context.Context, errorObj *model.ErrorObjec
}

func (r *Resolver) GetOrCreateErrorGroup(ctx context.Context, errorObj *model.ErrorObject, matchFn func() (*int, error), onCreateGroup func(int) error, tagGroup bool) (*model.ErrorGroup, error) {
span, ctx := util.StartSpanFromContext(ctx, "resolver.GetOrCreateErrorGroup", util.Tag("error_object_id", errorObj.ID))
defer span.Finish()

match, err := matchFn()
if err != nil {
return nil, err
Expand Down Expand Up @@ -557,7 +560,7 @@ func (r *Resolver) GetOrCreateErrorGroup(ctx context.Context, errorObj *model.Er
}

func (r *Resolver) GetTopErrorGroupMatchByEmbedding(ctx context.Context, projectID int, method model.ErrorGroupingMethod, embedding model.Vector, threshold float64) (*int, error) {
span, ctx := util.StartSpanFromContext(ctx, "public-resolver", util.ResourceName("GetTopErrorGroupMatchByEmbedding"), util.Tag("projectID", projectID))
span, ctx := util.StartSpanFromContext(ctx, "resolver.GetTopErrorGroupMatchByEmbedding", util.Tag("projectID", projectID), util.Tag("method", method))
defer span.Finish()

result := struct {
Expand Down Expand Up @@ -614,6 +617,9 @@ func (r *Resolver) GetTopErrorGroupMatchByEmbedding(ctx context.Context, project
}

func (r *Resolver) GetTopErrorGroupMatch(ctx context.Context, event string, projectID int, fingerprints []*model.ErrorFingerprint) (*int, error) {
span, ctx := util.StartSpanFromContext(ctx, "resolver.GetTopErrorGroupMatch", util.Tag("projectID", projectID), util.Tag("event", event), util.Tag("num_fingerprints", len(fingerprints)))
defer span.Finish()

firstCode := ""
firstMeta := ""
restCode := []string{}
Expand Down Expand Up @@ -748,7 +754,8 @@ func (r *Resolver) isWithinErrorQuota(ctx context.Context, workspace *model.Work
return withinBillingQuota
}

// Matches the ErrorObject with an existing ErrorGroup, or creates a new one if the group does not exist
// HandleErrorAndGroup caches the result of handleErrorAndGroup under the exact match of the error body + stacktrace.
// Improves performance of handleErrorAndGroup by first checking if the exact error object has been grouped before.
func (r *Resolver) HandleErrorAndGroup(ctx context.Context, errorObj *model.ErrorObject, structuredStackTrace []*privateModel.ErrorTrace, fields []*model.ErrorField, projectID int, workspace *model.Workspace) (*model.ErrorGroup, error) {
span, ctx := util.StartSpanFromContext(ctx, "HandleErrorAndGroup", util.Tag("projectID", projectID))
defer span.Finish()
Expand Down Expand Up @@ -789,7 +796,48 @@ func (r *Resolver) HandleErrorAndGroup(ctx context.Context, errorObj *model.Erro
}
}

fingerprints := []*model.ErrorFingerprint{}
key := errorgroups.GetKey(projectID, errorObj, structuredStackTrace)
var cacheMiss bool
eg, err := redis.CachedEval(ctx, r.Redis, key, 10*time.Second, time.Hour, func() (*model.ErrorGroup, error) {
cacheMiss = true
return r.handleErrorAndGroup(ctx, project, errorObj, structuredStackTrace, fields, projectID, workspace)
})
if eg == nil || err != nil {
log.WithContext(ctx).WithError(err).WithField("project_id", projectID).Error("failed to group error")
return eg, err
}

// on cache hit, we want to run logic to update error group based on new error object
if !cacheMiss {
// tagGroup is ignored when error group is matched
eg, err = r.GetOrCreateErrorGroup(ctx, errorObj, func() (*int, error) {
return ptr.Int(eg.ID), nil
}, nil, false)
if eg == nil || err != nil {
log.WithContext(ctx).WithError(err).WithField("project_id", projectID).Error("failed to group error on cache hit")
return eg, err
}
}

// save error object after grouping
errorObj.ErrorGroupID = eg.ID
if err := r.DB.WithContext(ctx).Create(errorObj).Error; err != nil {
return nil, e.Wrap(err, "Error performing error insert for error")
}

if err := r.DataSyncQueue.Submit(ctx, strconv.Itoa(errorObj.ID), &kafka_queue.Message{Type: kafka_queue.ErrorObjectDataSync, ErrorObjectDataSync: &kafka_queue.ErrorObjectDataSyncArgs{ErrorObjectID: errorObj.ID}}); err != nil {
return nil, err
}

return eg, err
}

// Matches the ErrorObject with an existing ErrorGroup, or creates a new one if the group does not exist
func (r *Resolver) handleErrorAndGroup(ctx context.Context, project *model.Project, errorObj *model.ErrorObject, structuredStackTrace []*privateModel.ErrorTrace, fields []*model.ErrorField, projectID int, workspace *model.Workspace) (*model.ErrorGroup, error) {
span, ctx := util.StartSpanFromContext(ctx, "handleErrorAndGroup", util.Tag("projectID", projectID))
defer span.Finish()

var fingerprints []*model.ErrorFingerprint
fingerprints = append(fingerprints, errorgroups.GetFingerprints(projectID, structuredStackTrace)...)

// Try unmarshalling the Event to JSON.
Expand All @@ -815,8 +863,8 @@ func (r *Resolver) HandleErrorAndGroup(ctx context.Context, errorObj *model.Erro
}
}

var err error
var errorGroup *model.ErrorGroup

var settings *model.AllWorkspaceSettings
if workspace != nil {
if settings, err = r.Store.GetAllWorkspaceSettings(ctx, workspace.ID); err != nil {
Expand All @@ -831,15 +879,15 @@ func (r *Resolver) HandleErrorAndGroup(ctx context.Context, errorObj *model.Erro
var emb []*model.ErrorObjectEmbeddings
emb, err = r.EmbeddingsClient.GetEmbeddings(eCtx, []*model.ErrorObject{errorObj})
if err != nil || len(emb) == 0 {
log.WithContext(ctx).WithError(err).WithField("error_object_id", errorObj.ID).Error("failed to get embeddings")
log.WithContext(ctx).WithError(err).Error("failed to get embeddings")
errorObj.ErrorGroupingMethod = model.ErrorGroupingMethodClassic
} else {
embedding = emb[0]
embeddingType := model.ErrorGroupingMethodGteLargeEmbeddingV3
errorGroup, err = r.GetOrCreateErrorGroup(ctx, errorObj, func() (*int, error) {
match, err := r.GetTopErrorGroupMatchByEmbedding(ctx, errorObj.ProjectID, embeddingType, embedding.GteLargeEmbedding, settings.ErrorEmbeddingsThreshold)
if err != nil {
log.WithContext(ctx).WithError(err).WithField("error_object_id", errorObj.ID).Error("failed to group error using embeddings")
log.WithContext(ctx).WithError(err).Error("failed to group error using embeddings")
}
return match, err
}, func(errorGroupId int) error {
Expand Down Expand Up @@ -873,15 +921,6 @@ func (r *Resolver) HandleErrorAndGroup(ctx context.Context, errorObj *model.Erro
return nil, e.Wrap(err, "Error getting or creating error group")
}
}
errorObj.ErrorGroupID = errorGroup.ID

if err := r.DB.WithContext(ctx).Create(errorObj).Error; err != nil {
return nil, e.Wrap(err, "Error performing error insert for error")
}

if err := r.DataSyncQueue.Submit(ctx, strconv.Itoa(errorObj.ID), &kafka_queue.Message{Type: kafka_queue.ErrorObjectDataSync, ErrorObjectDataSync: &kafka_queue.ErrorObjectDataSyncArgs{ErrorObjectID: errorObj.ID}}); err != nil {
return nil, err
}

if err := r.AppendErrorFields(ctx, fields, errorGroup); err != nil {
return nil, e.Wrap(err, "error appending error fields")
Expand Down Expand Up @@ -2255,6 +2294,10 @@ func (r *Resolver) ProcessBackendPayloadImpl(ctx context.Context, sessionSecureI
for _, errorInstances := range groupedErrors {
instance := errorInstances[len(errorInstances)-1]
data := groups[instance.ErrorGroupID]
if data.Group == nil || data.SessionObj == nil {
log.WithContext(ctx).WithField("error_group_id", instance.ErrorGroupID).Error("skipping error group alert")
continue
}
r.sendErrorAlert(ctx, data.Group.ProjectID, data.SessionObj, data.Group, instance, data.VisitedURL)
}

Expand Down
3 changes: 1 addition & 2 deletions backend/redis/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ package redis

import (
"context"
"time"

"github.com/go-redis/cache/v9"
log "github.com/sirupsen/logrus"
"time"
)

type Config struct {
Expand Down
2 changes: 1 addition & 1 deletion e2e/dotnet/dotnet.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Highlight.ASPCore" Version="0.1.7" />
<PackageReference Include="Highlight.ASPCore" Version="0.2.8" />
</ItemGroup>

</Project>
2 changes: 2 additions & 0 deletions e2e/dotnet/dotnet.sln.DotSettings.user
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
<wpf:ResourceDictionary xml:space="preserve" xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml" xmlns:s="clr-namespace:System;assembly=mscorlib" xmlns:ss="urn:shemas-jetbrains-com:settings-storage-xaml" xmlns:wpf="http://schemas.microsoft.com/winfx/2006/xaml/presentation">
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AAction_00601_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003F_002Econfig_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Fdc37343e91a74fa08a5cd84f35e6e7f2b2d000_003F58_003Fb419d2a4_003FAction_00601_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003ABatchedOpenTelemetrySinkOptions_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003F_002Econfig_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F14fe666a9659405990afc8069f75911b1ea00_003F5c_003F17185c69_003FBatchedOpenTelemetrySinkOptions_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AExceptionDispatchInfo_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FSourcesCache_003Fbf9021a960b74107a7e141aa06bc9d8a0a53c929178c2fb95b1597be8af8dc_003FExceptionDispatchInfo_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AHighlightCollectionExtensions_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F279415144e514a2b9a2e2e2432c2bd385a00_003F46_003Fb69a93fc_003FHighlightCollectionExtensions_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AHighlightCollectionExtensions_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003FLibrary_003FApplication_0020Support_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003Ff4ba866c94ba49f7b01ce2b36f00ff5a5200_003F85_003F63e65f83_003FHighlightCollectionExtensions_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AHttpRequest_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003F_002Econfig_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FSourcesCache_003Fb34eef3a22e5b353fa75af531b179226d42b88d39fbefa3f0d8d4372def62a3_003FHttpRequest_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AIRazorComponentsBuilder_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003F_002Econfig_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FSourcesCache_003F65103367c25c6981c9abe8ef44a929e123ecd713d6c215922b5ad7628c6801b_003FIRazorComponentsBuilder_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
<s:String x:Key="/Default/CodeInspection/ExcludedFiles/FilesAndFoldersToSkip2/=7020124F_002D9FFC_002D4AC3_002D8F3D_002DAAB8E0240759_002Ff_003AOpenTelemetryLoggerConfigurationExtensions_002Ecs_002Fl_003A_002E_002E_003F_002E_002E_003F_002E_002E_003F_002E_002E_003F_002Econfig_003FJetBrains_003FRider2024_002E2_003Fresharper_002Dhost_003FDecompilerCache_003Fdecompiler_003F14fe666a9659405990afc8069f75911b1ea00_003F47_003Fd5f63043_003FOpenTelemetryLoggerConfigurationExtensions_002Ecs/@EntryIndexedValue">ForceIncluded</s:String>
Expand Down
2 changes: 1 addition & 1 deletion e2e/dotnet4/cs/packages.config
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,4 @@
<package id="Modernizr" version="2.8.3" targetFramework="net48" />
<package id="Newtonsoft.Json" version="13.0.1" targetFramework="net48" />
<package id="WebGrease" version="1.6.0" targetFramework="net48" />
</packages>
</packages>
21 changes: 9 additions & 12 deletions e2e/nextjs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
"start": "next start -p 3005",
"lint": "next lint"
},
"installConfig": {
"hoistingLimits": "workspaces"
},
"dependencies": {
"@highlight-run/next": "workspace:*",
"@highlight-run/pino": "workspace:*",
Expand All @@ -21,23 +18,23 @@
"@trpc/next": "^10.44.1",
"@trpc/react-query": "^10.38.5",
"@trpc/server": "^10.38.5",
"@types/node": "20.8.2",
"@types/node": "^20.8.2",
"@types/pg": "^8.10.9",
"@types/react": "18.2.25",
"@types/react-dom": "18.2.10",
"@types/react": "^18.2.25",
"@types/react-dom": "^18.2.10",
"babylonjs": "^6.23.0",
"classnames": "^2.3.2",
"eslint": "8.50.0",
"eslint-config-next": "13.5.4",
"eslint": "^8.50.0",
"eslint-config-next": "^13.5.4",
"ky": "^1.2.3",
"next": "14.1.1",
"next": "^14.1.1",
"next-build-id": "^3.0.0",
"pg": "^8.11.3",
"pino": "^8.19.0",
"pino-pretty": "^10.3.1",
"react": "18.2.0",
"react-dom": "18.2.0",
"typescript": "5.2.2",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"typescript": "^5.2.2",
"winston": "^3.10.0",
"zod": "^3.22.4"
},
Expand Down
13 changes: 6 additions & 7 deletions e2e/tests/src/fetch_session_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from query_gql import (
GET_SESSION,
GET_SESSION_INTERVALS,
GET_SESSIONS_CLICKHOUSE,
GET_SESSIONS,
GET_EVENT_CHUNKS,
GET_EVENT_CHUNK_URL,
)
Expand Down Expand Up @@ -47,12 +47,11 @@ def main():
r = requests.post(
API_URL,
json={
"operationName": "GetSessionsClickhouse",
"operationName": "GetSessions",
"variables": {
"query": {
"isAnd": True,
"rules": [],
"dateRange": {
"params": {
"query": "",
"date_range": {
"start_date": (datetime.now() - timedelta(days=90)).strftime(
"%Y-%m-%dT%H:%M:%S.%fZ"
),
Expand All @@ -64,7 +63,7 @@ def main():
"project_id": PROJECT_ID,
"sort_desc": True,
},
"query": GET_SESSIONS_CLICKHOUSE,
"query": GET_SESSIONS,
},
headers={"Authorization": f"Bearer {auth}"},
)
Expand Down
Loading

0 comments on commit 9620894

Please sign in to comment.