Skip to content

Commit

Permalink
fix(core): Fix tantivy column cache not releasing memory from deleted…
Browse files Browse the repository at this point in the history
… segments (#1864)

Columns in the column cache hold a reference to the mmaped file data that backs
the segment.  These segments can be deleted during segment merging, but if a column
for that segment is in the cache it prevents the mmap from closing and releasing RAM.

To fix this we subscribe for notifications on segment list changes and clear the
column cache when these occur so stale segments can be reclaimed.
  • Loading branch information
rfairfax authored Oct 7, 2024
1 parent 33e4656 commit ebee7ae
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 6 deletions.
2 changes: 1 addition & 1 deletion core/src/rust/filodb_core/src/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ pub extern "system" fn Java_filodb_core_memstore_TantivyNativeMethods_00024_newI
column_cache_size as u64,
query_cache_max_size as u64,
query_cache_estimated_item_size as u64,
))
)?)
})
}

Expand Down
20 changes: 15 additions & 5 deletions core/src/rust/filodb_core/src/state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ use std::{
use filesize::PathExt;
use jni::sys::jlong;
use tantivy::{
directory::MmapDirectory,
directory::{MmapDirectory, WatchCallback, WatchHandle},
schema::{Field, OwnedValue, Schema},
IndexReader, IndexWriter, Searcher, TantivyDocument, TantivyError,
Directory, IndexReader, IndexWriter, Searcher, TantivyDocument, TantivyError,
};
use tantivy_utils::{
collectors::{
Expand Down Expand Up @@ -40,6 +40,8 @@ pub struct IndexHandle {
pub column_cache: ColumnCache,
// Mmap dir - used for stats only
pub mmap_directory: MmapDirectory,
// Watch handle - notifies when to clear the column cache
_watch_handle: WatchHandle,

// Fields that need synchronization
//
Expand All @@ -59,8 +61,15 @@ impl IndexHandle {
column_cache_size: u64,
query_cache_max_size: u64,
query_cache_estimated_item_size: u64,
) -> jlong {
) -> tantivy::Result<jlong> {
let estimated_item_count: u64 = query_cache_max_size / query_cache_estimated_item_size;
let column_cache = ColumnCache::new(column_cache_size as usize);

let cache = column_cache.clone();
// When the index segment list changes, clear the column cache to release those mmaped files
let watch_handle = mmap_directory.watch(WatchCallback::new(move || {
cache.clear();
}))?;

let obj = Box::new(Self {
schema,
Expand All @@ -69,11 +78,12 @@ impl IndexHandle {
reader,
changes_pending: AtomicBool::new(false),
query_cache: QueryCache::new(estimated_item_count, query_cache_max_size),
column_cache: ColumnCache::new(column_cache_size as usize),
column_cache,
mmap_directory,
_watch_handle: watch_handle,
});

Box::into_raw(obj) as jlong
Ok(Box::into_raw(obj) as jlong)
}

/// Decode handle back into a reference
Expand Down
4 changes: 4 additions & 0 deletions core/src/rust/tantivy_utils/src/collectors/column_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ impl ColumnCache {
}
}

pub fn clear(&self) {
self.cache.clear();
}

pub fn stats(&self) -> (u64, u64) {
(self.cache.hits(), self.cache.misses())
}
Expand Down

0 comments on commit ebee7ae

Please sign in to comment.