Skip to content

Commit

Permalink
Merge pull request #4 from anyproto/go-4318-catch-tantivy-errors
Browse files Browse the repository at this point in the history
GO-4318 Add an ability to catch a panic
  • Loading branch information
fat-fellow authored Oct 23, 2024
2 parents 30eb2ba + 6d6df20 commit 809bab7
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 43 deletions.
2 changes: 1 addition & 1 deletion bindings.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,4 @@ void document_free(struct Document *doc_ptr);

void string_free(char *s);

void init_lib(const char *log_level_ptr, char **error_buffer);
void init_lib(const char *log_level_ptr, char **error_buffer, bool clear_on_panic);
2 changes: 1 addition & 1 deletion example/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ const NameTitle = "title"

func main() {
// Initialize the library
err := tantivy_go.LibInit("debug")
err := tantivy_go.LibInit(true, "debug")
if err != nil {
fmt.Println("Failed to initialize library:", err)
return
Expand Down
3 changes: 2 additions & 1 deletion rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ env_logger = "0.11.3"
serde_json = "1.0.117"
serde = { version = "1.0.203", features = ["derive"] }
unicode-segmentation = "1.11.0"
logcall = "0.1"
logcall = "0.1"
lazy_static = "1.5.0"
91 changes: 67 additions & 24 deletions rust/src/c_util/util.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use std::{fs, slice};
use std::{fs, panic, slice};
use std::collections::HashMap;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
use std::path::Path;
use std::sync::Mutex;
use lazy_static::lazy_static;
use log::debug;
use serde_json::json;
use tantivy::{Index, IndexWriter, TantivyDocument, TantivyError, Term};
Expand All @@ -12,6 +14,10 @@ use tantivy::schema::{Field, Schema};

use crate::tantivy_util::{convert_document_to_json, Document, TantivyContext, DOCUMENT_BUDGET_BYTES, find_highlights, get_string_field_entry, SearchResult};

lazy_static! {
static ref FTS_PATH: Mutex<String> = Mutex::new(String::from(""));
}

pub fn set_error(err: &str, error_buffer: *mut *mut c_char) {
let err_str = match CString::new(err) {
Ok(s) => s,
Expand All @@ -29,23 +35,31 @@ fn write_buffer(error_buffer: *mut *mut c_char, err_str: CString) {
}
}

pub fn assert_string<'a>(str_ptr: *const c_char, error_buffer: *mut *mut c_char) -> Option<&'a str> {
let result = unsafe {
fn process_c_str<'a>(str_ptr: *const c_char, error_buffer: *mut *mut c_char) -> Result<&'a str, String> {
unsafe {
if str_ptr.is_null() {
set_error(POINTER_IS_NULL, error_buffer);
return None;
return Err(POINTER_IS_NULL.to_owned());
}
CStr::from_ptr(str_ptr)
}.to_str();
match result {
Ok(str) => Some(str),
Err(err) => {
set_error(&err.to_string(), error_buffer);
return None;
match CStr::from_ptr(str_ptr).to_str() {
Ok(valid_str) => Ok(valid_str),
Err(err) => {
let error_message = err.to_string();
set_error(&error_message, error_buffer);
Err(error_message)
}
}
}
}

pub fn assert_string(str_ptr: *const c_char, error_buffer: *mut *mut c_char) -> Option<String> {
match process_c_str(str_ptr, error_buffer) {
Ok(valid_str) => Some(valid_str.to_owned()),
Err(_) => None,
}
}


pub fn assert_pointer<'a, T>(ptr: *mut T, error_buffer: *mut *mut c_char) -> Option<&'a mut T> {
let result = unsafe {
if ptr.is_null() {
Expand Down Expand Up @@ -91,7 +105,7 @@ pub fn process_string_slice<'a, F>(
mut func: F,
) -> Result<(), ()>
where
F: FnMut(&'a str) -> Result<(), ()>,
F: FnMut(String) -> Result<(), ()>,
{
let slice = match assert_pointer(ptr, error_buffer) {
Some(ptr) => unsafe { slice::from_raw_parts(ptr, len) },
Expand All @@ -112,14 +126,14 @@ where
Ok(())
}

pub fn schema_apply_for_field<'a, T, K, F: FnMut(Field, &'a str) -> Result<T, ()>>(
pub fn schema_apply_for_field<'a, T, K, F: FnMut(Field, String) -> Result<T, ()>>(
error_buffer: *mut *mut c_char,
schema: Schema,
field_name: &'a str,
field_name: String,
mut func: F,
) -> Result<T, ()>
{
match schema.get_field(field_name) {
match schema.get_field(field_name.as_str()) {
Ok(field) => func(field, field_name),
Err(err) => {
set_error(&err.to_string(), error_buffer);
Expand Down Expand Up @@ -152,14 +166,43 @@ pub fn convert_document_as_json(
Ok(json!(doc_json).to_string())
}

pub fn start_lib_init(log_level: &str) {
pub fn start_lib_init(log_level: String, clear_on_panic: bool) {
let old_hook = panic::take_hook();
if clear_on_panic {
panic::set_hook(Box::new(move |panic_info| {
let _ = match FTS_PATH.lock() {
Ok(fts_path) => {
let fts_path = fts_path.as_str();
if fts_path.is_empty() {
debug!("fts path is empty");
} else {
let _ = fs::remove_dir_all(Path::new(fts_path));
}
}
Err(e) => {
debug!("Set hook err: {}", e);
}
};
old_hook(panic_info)
}));
}

let _ = env_logger::Builder::from_env(
env_logger::Env::default().default_filter_or(log_level)
).try_init();
}

pub fn create_context_with_schema(error_buffer: *mut *mut c_char, schema: Schema, path: &str) -> Result<*mut TantivyContext, ()> {
match fs::create_dir_all(Path::new(path)) {
pub fn create_context_with_schema(
error_buffer: *mut *mut c_char,
schema: Schema,
path: String,
) -> Result<*mut TantivyContext, ()> {
match FTS_PATH.lock() {
Ok(mut fts_path) => *fts_path = path.clone(),
Err(e) => debug!("Failed to set path: {}", e),
};

match fs::create_dir_all(Path::new(path.as_str())) {
Err(e) => {
debug!("Failed to create directories: {}", e);
set_error(&e.to_string(), error_buffer);
Expand Down Expand Up @@ -220,7 +263,7 @@ pub fn delete_docs(
delete_ids_len: usize,
error_buffer: *mut *mut c_char,
context: &mut TantivyContext,
field_name: &str,
field_name: String,
) {
let schema = context.index.schema();

Expand All @@ -239,7 +282,7 @@ pub fn delete_docs(
};

if process_string_slice(delete_ids_ptr, error_buffer, delete_ids_len, |id_value| {
let _ = context.writer.delete_term(Term::from_field_text(field, id_value));
let _ = context.writer.delete_term(Term::from_field_text(field, id_value.as_str()));
Ok(())
}).is_err() {
rollback(error_buffer, &mut context.writer, "Failed to process string slice");
Expand All @@ -254,7 +297,7 @@ pub fn delete_docs(
fn rollback(
error_buffer: *mut *mut c_char,
writer: &mut IndexWriter,
message: &str
message: &str,
) {
let _ = writer.rollback();
set_error(message, error_buffer);
Expand All @@ -278,8 +321,8 @@ pub fn add_field(
error_buffer: *mut *mut c_char,
doc: &mut Document,
index: &Index,
field_name: &str,
field_value: &str,
field_name: String,
field_value: String,
) {
let schema = index.schema();
let field = match schema_apply_for_field::<Field, (), _>
Expand Down Expand Up @@ -324,7 +367,7 @@ pub fn search(

let query_parser = QueryParser::for_index(&context.index, fields);

let query = match query_parser.parse_query(query) {
let query = match query_parser.parse_query(query.as_str()) {
Ok(query) => query,
Err(err) => {
set_error(&err.to_string(), error_buffer);
Expand Down
13 changes: 7 additions & 6 deletions rust/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ pub extern "C" fn schema_builder_add_text_field(
_ => return set_error("index_record_option_const is wrong", error_buffer)
};

add_text_field(stored, is_text, is_fast, builder, tokenizer_name, field_name, index_record_option);
add_text_field(stored, is_text, is_fast, builder, tokenizer_name.as_str(), field_name.as_str(), index_record_option);
}

#[logcall]
Expand Down Expand Up @@ -110,7 +110,7 @@ pub extern "C" fn context_register_text_analyzer_ngram(
None => return
};

match register_ngram_tokenizer(min_gram, max_gram, prefix_only, &context.index, tokenizer_name) {
match register_ngram_tokenizer(min_gram, max_gram, prefix_only, &context.index, tokenizer_name.as_str()) {
Err(err) => return set_error(&err.to_string(), error_buffer),
_ => return
};
Expand All @@ -136,7 +136,7 @@ pub extern "C" fn context_register_text_analyzer_edge_ngram(
None => return
};

register_edge_ngram_tokenizer(min_gram, max_gram, limit, &context.index, tokenizer_name);
register_edge_ngram_tokenizer(min_gram, max_gram, limit, &context.index, tokenizer_name.as_str());
}

#[logcall]
Expand All @@ -163,7 +163,7 @@ pub extern "C" fn context_register_text_analyzer_simple(
None => return
};

register_simple_tokenizer(text_limit, &context.index, tokenizer_name, lang);
register_simple_tokenizer(text_limit, &context.index, tokenizer_name.as_str(), lang.as_str());
}

#[logcall]
Expand All @@ -183,7 +183,7 @@ pub extern "C" fn context_register_text_analyzer_raw(
None => return
};

register_raw_tokenizer(&context.index, tokenizer_name);
register_raw_tokenizer(&context.index, tokenizer_name.as_str());
}

#[logcall]
Expand Down Expand Up @@ -401,10 +401,11 @@ pub extern "C" fn string_free(s: *mut c_char) {
pub unsafe extern "C" fn init_lib(
log_level_ptr: *const c_char,
error_buffer: *mut *mut c_char,
clear_on_panic: bool
) {
let log_level = match assert_string(log_level_ptr, error_buffer) {
Some(value) => value,
None => return
};
start_lib_init(log_level);
start_lib_init(log_level, clear_on_panic);
}
12 changes: 6 additions & 6 deletions rust/src/tantivy_util/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@ use crate::tantivy_util::{Document, extract_text_from_owned_value};

pub fn convert_document_to_json<'a>(
doc: &&mut Document,
field_to_name: HashMap<Field, &'a str>,
) -> HashMap<&'a str, serde_json::Value> {
let mut result_json: HashMap<&str, serde_json::Value> = HashMap::new();
field_to_name: HashMap<Field, String>,
) -> HashMap<String, serde_json::Value> {
let mut result_json: HashMap<String, serde_json::Value> = HashMap::new();

let _ = serde_json::to_value(doc.score).is_ok_and(
|score| result_json.insert("score", score).is_some()
|score| result_json.insert("score".to_string(), score).is_some()
);

let _ = serde_json::to_value(&doc.highlights).is_ok_and(
|highlights| result_json.insert("highlights", highlights).is_some()
|highlights| result_json.insert("highlights".to_string(), highlights).is_some()
);

let doc = &doc.tantivy_doc;
Expand All @@ -22,7 +22,7 @@ pub fn convert_document_to_json<'a>(
Some(key) => {
let _ = extract_text_from_owned_value(&field_value.value).is_some_and(
|value| serde_json::to_value(value).is_ok_and(
|value| result_json.insert(key, value).is_some())
|value| result_json.insert(key.to_string(), value).is_some())
);
}
None => {}
Expand Down
10 changes: 9 additions & 1 deletion rust/src/tantivy_util/scheme_builder.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
use tantivy::schema::{FAST, IndexRecordOption, SchemaBuilder, STORED, STRING, TEXT, TextFieldIndexing};

pub fn add_text_field(stored: bool, is_text: bool, is_fast: bool, builder: &mut SchemaBuilder, tokenizer_name: &str, field_name: &str, index_record_option: IndexRecordOption) {
pub fn add_text_field(
stored: bool,
is_text: bool,
is_fast: bool,
builder: &mut SchemaBuilder,
tokenizer_name: &str,
field_name: &str,
index_record_option: IndexRecordOption,
) {
let mut text_options = if is_text { TEXT } else { STRING };
text_options = if stored { text_options | STORED } else { text_options };
text_options = if is_fast { text_options | FAST } else { text_options };
Expand Down
5 changes: 3 additions & 2 deletions tantivy.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ var doOnce sync.Once
//
// Returns:
// - An error if the initialization fails.
func LibInit(directive ...string) error {
func LibInit(cleanOnPanic bool, directive ...string) error {
var initVal string
var err error
doOnce.Do(func() {
Expand All @@ -47,8 +47,9 @@ func LibInit(directive ...string) error {

cInitVal := C.CString(initVal)
defer C.string_free(cInitVal)
cCleanOnPanic := C.bool(cleanOnPanic)
var errBuffer *C.char
C.init_lib(cInitVal, &errBuffer)
C.init_lib(cInitVal, &errBuffer, cCleanOnPanic)

errorMessage := C.GoString(errBuffer)
defer C.string_free(errBuffer)
Expand Down
2 changes: 1 addition & 1 deletion tantivy_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ func fx(
minGram uintptr,
isFastId bool,
) (*tantivy_go.Schema, *tantivy_go.TantivyContext) {
err := tantivy_go.LibInit("debug")
err := tantivy_go.LibInit(true, "debug")
assert.NoError(t, err)
builder, err := tantivy_go.NewSchemaBuilder()
require.NoError(t, err)
Expand Down

0 comments on commit 809bab7

Please sign in to comment.