Skip to content

Commit

Permalink
MB-57888: WIP: Index Update
Browse files Browse the repository at this point in the history
  • Loading branch information
Likith101 committed Nov 26, 2024
1 parent 902051d commit fb40760
Show file tree
Hide file tree
Showing 6 changed files with 487 additions and 12 deletions.
35 changes: 31 additions & 4 deletions index/scorch/persister.go
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,18 @@ func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
return nil, nil, err
}
}

// store updated field info
if segmentSnapshot.updatedFields != nil {
b, err := json.Marshal(segmentSnapshot.updatedFields)
if err != nil {
return nil, nil, err
}
err = snapshotSegmentBucket.Put(boltUpdatedFieldsKey, b)
if err != nil {
return nil, nil, err
}
}
}

return filenames, newSegmentPaths, nil
Expand Down Expand Up @@ -722,6 +734,7 @@ var boltMetaDataSegmentTypeKey = []byte("type")
var boltMetaDataSegmentVersionKey = []byte("version")
var boltMetaDataTimeStamp = []byte("timeStamp")
var boltStatsKey = []byte("stats")
var boltUpdatedFieldsKey = []byte("fields")
var TotBytesWrittenKey = []byte("TotBytesWritten")

func (s *Scorch) loadFromBolt() error {
Expand Down Expand Up @@ -860,6 +873,9 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
}
rv.segment = append(rv.segment, segmentSnapshot)
rv.offsets = append(rv.offsets, running)
if segmentSnapshot.updatedFields != nil {
rv.updatedFields = segmentSnapshot.updatedFields
}
running += segmentSnapshot.segment.Count()
}
}
Expand All @@ -872,13 +888,13 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
return nil, fmt.Errorf("segment path missing")
}
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
segment, err := s.segPlugin.Open(segmentPath)
seg, err := s.segPlugin.Open(segmentPath)
if err != nil {
return nil, fmt.Errorf("error opening bolt segment: %v", err)
}

rv := &SegmentSnapshot{
segment: segment,
segment: seg,
cachedDocs: &cachedDocs{cache: nil},
cachedMeta: &cachedMeta{meta: nil},
}
Expand All @@ -888,7 +904,7 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
r := bytes.NewReader(deletedBytes)
_, err := deletedBitmap.ReadFrom(r)
if err != nil {
_ = segment.Close()
_ = seg.Close()
return nil, fmt.Errorf("error reading deleted bytes: %v", err)
}
if !deletedBitmap.IsEmpty() {
Expand All @@ -902,11 +918,22 @@ func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, erro
err := json.Unmarshal(statBytes, &statsMap)
stats := &fieldStats{statMap: statsMap}
if err != nil {
_ = segment.Close()
_ = seg.Close()
return nil, fmt.Errorf("error reading stat bytes: %v", err)
}
rv.stats = stats
}
updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey)
if updatedFieldBytes != nil {
var updatedFields map[string]index.FieldInfo

Check failure on line 928 in index/scorch/persister.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/persister.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/persister.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/persister.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/persister.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.FieldInfo

err := json.Unmarshal(updatedFieldBytes, &updatedFields)
if err != nil {
_ = seg.Close()
return nil, fmt.Errorf("error reading updated field bytes: %v", err)
}
rv.updatedFields = updatedFields
}

return rv, nil
}
Expand Down
71 changes: 71 additions & 0 deletions index/scorch/scorch.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package scorch
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"sync"
Expand All @@ -36,6 +37,8 @@ const Version uint8 = 2

var ErrClosed = fmt.Errorf("scorch closed")

var mappingInternalKey = []byte("_mapping")

type Scorch struct {
nextSegmentID uint64
stats Stats
Expand Down Expand Up @@ -882,3 +885,71 @@ func (s *Scorch) CopyReader() index.CopyReader {
func (s *Scorch) FireIndexEvent() {
s.fireEvent(EventKindIndexStart, 0)
}

func (s *Scorch) UpdateFields(fieldInfo map[string]*index.FieldInfo, mappingBytes []byte) error {

Check failure on line 889 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 889 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 889 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 889 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 889 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.FieldInfo
err := s.updateBolt(fieldInfo, mappingBytes)
if err != nil {
return err
}
return nil
}

func (s *Scorch) updateBolt(fieldInfo map[string]*index.FieldInfo, mappingBytes []byte) error {

Check failure on line 897 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 897 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 897 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 897 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 897 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.FieldInfo
return s.rootBolt.Update(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
if snapshots == nil {
return nil
}

c := snapshots.Cursor()
for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
_, _, err := decodeUvarintAscending(k)
if err != nil {
log.Printf("unable to parse segment epoch %x, continuing", k)
continue
}
snapshot := snapshots.Bucket(k)
cc := snapshot.Cursor()
for kk, _ := cc.First(); kk != nil; kk, _ = c.Next() {
if k[0] == boltInternalKey[0] {
internalBucket := snapshot.Bucket(k)
if internalBucket == nil {
return fmt.Errorf("segment key, but bucket missing % x", k)
}
err = internalBucket.Put(mappingInternalKey, mappingBytes)
if err != nil {
return err
}
} else if k[0] != boltMetaDataKey[0] {
segmentBucket := snapshot.Bucket(k)
if segmentBucket == nil {
return fmt.Errorf("segment key, but bucket missing % x", k)
}
var updatedFields map[string]index.FieldInfo

Check failure on line 928 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 928 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.FieldInfo
updatedFieldBytes := segmentBucket.Get(boltUpdatedFieldsKey)
if updatedFieldBytes != nil {
err := json.Unmarshal(updatedFieldBytes, &updatedFields)
if err != nil {
return fmt.Errorf("error reading updated field bytes: %v", err)
}
} else {
updatedFields = make(map[string]index.FieldInfo)

Check failure on line 936 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 936 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 936 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 936 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 936 in index/scorch/scorch.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.FieldInfo
}
for field, info := range fieldInfo {
updatedFields[field] = *info
}
b, err := json.Marshal(updatedFields)
if err != nil {
return err
}
err = segmentBucket.Put(boltUpdatedFieldsKey, b)
if err != nil {
return err
}
}
}
}

return nil
})
}
28 changes: 26 additions & 2 deletions index/scorch/snapshot_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ type IndexSnapshot struct {

m2 sync.Mutex // Protects the fields that follow.
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's

updatedFields map[string]index.FieldInfo

Check failure on line 93 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 93 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 93 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 93 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 93 in index/scorch/snapshot_index.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.FieldInfo
}

func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
Expand Down Expand Up @@ -441,6 +443,10 @@ func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) {
// Keeping that TODO for now until we have a cleaner way.
rvd.StoredFieldsSize += uint64(len(val))

if info, ok := is.updatedFields[name]; ok &&
(info.All || info.Store) {
return true
}
// copy value, array positions to preserve them beyond the scope of this callback
value := append([]byte(nil), val...)
arrayPos := append([]uint64(nil), pos...)
Expand Down Expand Up @@ -580,7 +586,15 @@ func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field
segBytesRead := s.segment.BytesRead()
rv.incrementBytesRead(segBytesRead)
}
dict, err := s.segment.Dictionary(field)

var dict segment.TermDictionary
var err error
if info, ok := is.updatedFields[field]; ok &&
(info.Index || info.All) {
dict = nil
} else {
dict, err = s.segment.Dictionary(field)
}
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -712,14 +726,24 @@ func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
}
}

var filteredFields []string
for _, field := range vFields {
if info, ok := is.updatedFields[field]; ok &&
(info.DocValues || info.All) {
continue
} else {
filteredFields = append(filteredFields, field)
}
}

var errCh chan error

// cFields represents the fields that we'll need from the
// cachedDocs, and might be optionally be provided by the caller,
// if the caller happens to know we're on the same segmentIndex
// from a previous invocation
if cFields == nil {
cFields = subtractStrings(fields, vFields)
cFields = subtractStrings(fields, filteredFields)

if !ss.cachedDocs.hasFields(cFields) {
errCh = make(chan error, 1)
Expand Down
13 changes: 7 additions & 6 deletions index/scorch/snapshot_segment.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ type SegmentSnapshot struct {
// segment was mmaped recently, in which case
// we consider the loading cost of the metadata
// as part of IO stats.
mmaped uint32
id uint64
segment segment.Segment
deleted *roaring.Bitmap
creator string
stats *fieldStats
mmaped uint32
id uint64
segment segment.Segment
deleted *roaring.Bitmap
creator string
stats *fieldStats
updatedFields map[string]index.FieldInfo

Check failure on line 44 in index/scorch/snapshot_segment.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 44 in index/scorch/snapshot_segment.go

View workflow job for this annotation

GitHub Actions / test (1.22.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 44 in index/scorch/snapshot_segment.go

View workflow job for this annotation

GitHub Actions / test (1.20.x, macos-latest)

undefined: index.FieldInfo

Check failure on line 44 in index/scorch/snapshot_segment.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, ubuntu-latest)

undefined: index.FieldInfo

Check failure on line 44 in index/scorch/snapshot_segment.go

View workflow job for this annotation

GitHub Actions / test (1.21.x, macos-latest)

undefined: index.FieldInfo

cachedMeta *cachedMeta

Expand Down
37 changes: 37 additions & 0 deletions index_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,10 +163,25 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
storeConfig = map[string]interface{}{}
}

var um *mapping.IndexMappingImpl
var umBytes []byte

storeConfig["path"] = indexStorePath(path)
storeConfig["create_if_missing"] = false
storeConfig["error_if_exists"] = false
for rck, rcv := range runtimeConfig {
if rck == "mapping" {
if val, ok := rcv.([]byte); ok {
err = util.UnmarshalJSON(val, &um)
if err != nil {
return nil, fmt.Errorf("error parsing updated mapping JSON: %v\nmapping contents:\n%s", err, val)
}
umBytes = val
} else {
return nil, fmt.Errorf("error typecasting updated mapping JSON\nmapping contents: %v", rcv)
}
continue
}
storeConfig[rck] = rcv
}

Expand Down Expand Up @@ -225,6 +240,28 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
return rv, err
}

if um != nil {
ui, ok := rv.i.(index.UpdateIndex)
if !ok {
return rv, fmt.Errorf("updated mapping present for unupdatable index")
}

err = um.Validate()
if err != nil {
return rv, err
}

fieldInfo, err := deletedFields(im, um)
if err != nil {
return rv, err
}

err = ui.UpdateFields(fieldInfo, umBytes)
if err != nil {
return rv, err
}
}

rv.m = im
indexStats.Register(rv)
return rv, err
Expand Down
Loading

0 comments on commit fb40760

Please sign in to comment.