diff --git a/backend/background/jobs/event_embeddings.go b/backend/background/jobs/event_embeddings.go new file mode 100644 index 00000000..4c3da1a8 --- /dev/null +++ b/backend/background/jobs/event_embeddings.go @@ -0,0 +1,55 @@ +package jobs + +import ( + "context" + "fmt" + "log/slog" + "time" + + "github.com/GenerateNU/sac/backend/background" + "github.com/GenerateNU/sac/backend/entities/models" + "github.com/GenerateNU/sac/backend/search" + + "github.com/GenerateNU/sac/backend/constants" +) + +// Generate event embeddings for events that did not receive them when being created or updated. This could occur in the case of +// mock data (which is uploaded to postgres directly, doesn't go through the app), or in the case OpenAI API goes down (service outage, bad api key, etc) +func (j *Jobs) EventEmbeddings(ctx context.Context) background.JobFunc { + return func() { + t := time.NewTicker(constants.EMBEDDINGS_GENERATION_INTERVAL) + + for range t.C { + func() { + tx := j.db.WithContext(ctx).Begin() + defer func() { + if r := recover(); r != nil { + tx.Rollback() + } + }() + + var event models.Event + if err := tx.Raw("SELECT * FROM events WHERE embedding IS NULL FOR UPDATE SKIP LOCKED LIMIT 1").Scan(&event).Error; err != nil { + tx.Rollback() + return + } + + if event.Name == "" && event.Preview == "" && event.Description == "" { // empty club + tx.Rollback() + return + } + + slog.Info(fmt.Sprintf("Generating embeddings for event '%s' (%s)", event.Name, event.ID.String())) + + if err := search.UpsertEventEmbedding(tx, j.search, &event); err != nil { + tx.Rollback() + return + } + + if err := tx.Commit().Error; err != nil { + return + } + }() + } + } +} diff --git a/backend/main.go b/backend/main.go index eb005c6e..43f2a0a4 100644 --- a/backend/main.go +++ b/backend/main.go @@ -96,6 +96,7 @@ func startBackgroundJobs(ctx context.Context, db *gorm.DB, settings *config.Sett jobs := jobs.New(db, settings) background.Go(jobs.WelcomeSender(ctx)) background.Go(jobs.ClubEmbeddings(ctx)) + background.Go(jobs.EventEmbeddings(ctx)) } func configureIntegrations(config *config.Integrations) *integrations.Integrations { diff --git a/backend/search/embeddings.go b/backend/search/embeddings.go index 4e9698a1..e37a1615 100644 --- a/backend/search/embeddings.go +++ b/backend/search/embeddings.go @@ -13,6 +13,7 @@ import ( "github.com/goccy/go-json" "github.com/gofiber/fiber/v2" "gorm.io/gorm" + "gorm.io/gorm/logger" ) type CreateEmbeddingRequestBody struct { @@ -41,7 +42,10 @@ func UpsertClubEmbedding(db *gorm.DB, s *config.SearchSettings, club *models.Clu queryString := fmt.Sprintf( "UPDATE clubs SET embedding = '[%s]' WHERE id = '%s'", embeddingStr, club.ID.String()) - if err := db.Exec(queryString).Error; err != nil { + // Keep stdout/logs clean, don't output 512 floats + session := db.Session(&gorm.Session{Logger: logger.Default.LogMode(logger.Error)}) + + if err := session.Exec(queryString).Error; err != nil { return err } @@ -60,7 +64,10 @@ func UpsertEventEmbedding(db *gorm.DB, s *config.SearchSettings, event *models.E queryString := fmt.Sprintf( "UPDATE events SET embedding = '[%s]' WHERE id = '%s'", embeddingStr, event.ID.String()) - if err := db.Exec(queryString).Error; err != nil { + // Keep stdout/logs clean, don't output 512 floats + session := db.Session(&gorm.Session{Logger: logger.Default.LogMode(logger.Error)}) + + if err := session.Exec(queryString).Error; err != nil { return err }