diff --git a/clang-tidy/.clang-tidy b/clang-tidy/.clang-tidy deleted file mode 100644 index 063df74f1..000000000 --- a/clang-tidy/.clang-tidy +++ /dev/null @@ -1,31 +0,0 @@ -Checks: 'clang-diagnostic-*, - clang-analyzer-*, - performance-*, - bugprone-*, - -bugprone-exception-escape, - -bugprone-branch-clone, - -bugprone-easily-swappable-parameters, - -bugprone-macro-parentheses, - -bugprone-signed-char-misuse, - -bugprone-narrowing-conversions, - -bugprone-reserved-identifier, - -bugprone-implicit-widening-of-multiplication-result, - -bugprone-assignment-in-if-condition, - -bugprone-parent-virtual-call, - -bugprone-integer-division, - -bugprone-unhandled-self-assignment, - -bugprone-inc-dec-in-conditions, - -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, - -performance-no-int-to-ptr, - -performance-enum-size, - -performance-avoid-endl' -# clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling - too many unnecessary warning in vendored code -# performance-no-int-to-ptr - consider how to fix this -# bugprone-macro-parentheses - consider fixing -WarningsAsErrors: '*' -HeaderFilterRegex: '.*(?= 4.0.0 are given under - # the top level key 'Diagnostics' in the output yaml files - mergekey = "Diagnostics" - merged=[] - for replacefile in glob.iglob(os.path.join(tmpdir, '*.yaml')): - content = yaml.safe_load(open(replacefile, 'r')) - if not content: - continue # Skip empty files. - merged.extend(content.get(mergekey, [])) - - if merged: - # MainSourceFile: The key is required by the definition inside - # include/clang/Tooling/ReplacementsYaml.h, but the value - # is actually never used inside clang-apply-replacements, - # so we set it to '' here. - output = {'MainSourceFile': '', mergekey: merged} - with open(mergefile, 'w') as out: - yaml.safe_dump(output, out) - else: - # Empty the file: - open(mergefile, 'w').close() - - -def find_binary(arg, name, build_path): - """Get the path for a binary or exit""" - if arg: - if shutil.which(arg): - return arg - else: - raise SystemExit( - "error: passed binary '{}' was not found or is not executable" - .format(arg)) - - built_path = os.path.join(build_path, "bin", name) - binary = shutil.which(name) or shutil.which(built_path) - if binary: - return binary - else: - raise SystemExit( - "error: failed to find {} in $PATH or at {}" - .format(name, built_path)) - - -def apply_fixes(args, clang_apply_replacements_binary, tmpdir): - """Calls clang-apply-fixes on a given directory.""" - invocation = [clang_apply_replacements_binary] - invocation.append('-ignore-insert-conflict') - if args.format: - invocation.append('-format') - if args.style: - invocation.append('-style=' + args.style) - invocation.append(tmpdir) - subprocess.call(invocation) - - -def run_tidy(args, clang_tidy_binary, tmpdir, build_path, queue, lock, - failed_files): - """Takes filenames out of queue and runs clang-tidy on them.""" - while True: - name = queue.get() - invocation = get_tidy_invocation(name, clang_tidy_binary, args.checks, - tmpdir, build_path, args.header_filter, - args.allow_enabling_alpha_checkers, - args.extra_arg, args.extra_arg_before, - args.quiet, args.config_file, args.config, - args.line_filter, args.use_color, - args.plugins) - - proc = subprocess.Popen(invocation, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - output, err = proc.communicate() - if proc.returncode != 0: - if proc.returncode < 0: - msg = "%s: terminated by signal %d\n" % (name, -proc.returncode) - err += msg.encode('utf-8') - failed_files.append(name) - with lock: - sys.stdout.write(' '.join(invocation) + '\n' + output.decode('utf-8')) - if len(err) > 0: - sys.stdout.flush() - sys.stderr.write(err.decode('utf-8')) - queue.task_done() - - -def main(): - parser = argparse.ArgumentParser(description='Runs clang-tidy over all files ' - 'in a compilation database. Requires ' - 'clang-tidy and clang-apply-replacements in ' - '$PATH or in your build directory.') - parser.add_argument('-allow-enabling-alpha-checkers', - action='store_true', help='allow alpha checkers from ' - 'clang-analyzer.') - parser.add_argument('-clang-tidy-binary', metavar='PATH', - default='clang-tidy-18', - help='path to clang-tidy binary') - parser.add_argument('-clang-apply-replacements-binary', metavar='PATH', - default='clang-apply-replacements-18', - help='path to clang-apply-replacements binary') - parser.add_argument('-checks', default=None, - help='checks filter, when not specified, use clang-tidy ' - 'default') - config_group = parser.add_mutually_exclusive_group() - config_group.add_argument('-config', default=None, - help='Specifies a configuration in YAML/JSON format: ' - ' -config="{Checks: \'*\', ' - ' CheckOptions: {x: y}}" ' - 'When the value is empty, clang-tidy will ' - 'attempt to find a file named .clang-tidy for ' - 'each source file in its parent directories.') - config_group.add_argument('-config-file', default=None, - help='Specify the path of .clang-tidy or custom config ' - 'file: e.g. -config-file=/some/path/myTidyConfigFile. ' - 'This option internally works exactly the same way as ' - '-config option after reading specified config file. ' - 'Use either -config-file or -config, not both.') - parser.add_argument('-header-filter', default=None, - help='regular expression matching the names of the ' - 'headers to output diagnostics from. Diagnostics from ' - 'the main file of each translation unit are always ' - 'displayed.') - parser.add_argument('-line-filter', default=None, - help='List of files with line ranges to filter the' - 'warnings.') - if yaml: - parser.add_argument('-export-fixes', metavar='filename', dest='export_fixes', - help='Create a yaml file to store suggested fixes in, ' - 'which can be applied with clang-apply-replacements.') - parser.add_argument('-j', type=int, default=0, - help='number of tidy instances to be run in parallel.') - parser.add_argument('files', nargs='*', default=['.*'], - help='files to be processed (regex on path)') - parser.add_argument('-fix', action='store_true', help='apply fix-its') - parser.add_argument('-format', action='store_true', help='Reformat code ' - 'after applying fixes') - parser.add_argument('-style', default='file', help='The style of reformat ' - 'code after applying fixes') - parser.add_argument('-use-color', type=strtobool, nargs='?', const=True, - help='Use colors in diagnostics, overriding clang-tidy\'s' - ' default behavior. This option overrides the \'UseColor' - '\' option in .clang-tidy file, if any.') - parser.add_argument('-p', dest='build_path', - help='Path used to read a compile command database.') - parser.add_argument('-extra-arg', dest='extra_arg', - action='append', default=[], - help='Additional argument to append to the compiler ' - 'command line.') - parser.add_argument('-extra-arg-before', dest='extra_arg_before', - action='append', default=[], - help='Additional argument to prepend to the compiler ' - 'command line.') - parser.add_argument('-ignore', default=DEFAULT_CLANG_TIDY_IGNORE, - help='File path to clang-tidy-ignore') - parser.add_argument('-quiet', action='store_true', - help='Run clang-tidy in quiet mode') - parser.add_argument('-load', dest='plugins', - action='append', default=[], - help='Load the specified plugin in clang-tidy.') - args = parser.parse_args() - - db_path = 'compile_commands.json' - - if args.build_path is not None: - build_path = args.build_path - else: - # Find our database - build_path = find_compilation_database(db_path) - - clang_tidy_binary = find_binary(args.clang_tidy_binary, "clang-tidy", - build_path) - - tmpdir = None - if args.fix or (yaml and args.export_fixes): - clang_apply_replacements_binary = find_binary( - args.clang_apply_replacements_binary, "clang-apply-replacements", - build_path) - tmpdir = tempfile.mkdtemp() - - try: - invocation = get_tidy_invocation("", clang_tidy_binary, args.checks, - None, build_path, args.header_filter, - args.allow_enabling_alpha_checkers, - args.extra_arg, args.extra_arg_before, - args.quiet, args.config_file, args.config, - args.line_filter, args.use_color, - args.plugins) - invocation.append('-list-checks') - invocation.append('-') - if args.quiet: - # Even with -quiet we still want to check if we can call clang-tidy. - with open(os.devnull, 'w') as dev_null: - subprocess.check_call(invocation, stdout=dev_null) - else: - subprocess.check_call(invocation) - except: - print("Unable to run clang-tidy.", file=sys.stderr) - sys.exit(1) - - # Load the database and extract all files. - database = json.load(open(os.path.join(build_path, db_path))) - files = set([make_absolute(entry['file'], entry['directory']) - for entry in database]) - files, excluded = filter_files(args.ignore, files) - if excluded: - print("Excluding the following files:\n" + "\n".join(excluded) + "\n") - - max_task = args.j - if max_task == 0: - max_task = multiprocessing.cpu_count() - - # Build up a big regexy filter from all command line arguments. - file_name_re = re.compile('|'.join(args.files)) - - return_code = 0 - try: - # Spin up a bunch of tidy-launching threads. - task_queue = queue.Queue(max_task) - # List of files with a non-zero return code. - failed_files = [] - lock = threading.Lock() - for _ in range(max_task): - t = threading.Thread(target=run_tidy, - args=(args, clang_tidy_binary, tmpdir, build_path, - task_queue, lock, failed_files)) - t.daemon = True - t.start() - - # Fill the queue with files. - for name in files: - if file_name_re.search(name): - task_queue.put(name) - - # Wait for all threads to be done. - task_queue.join() - if len(failed_files): - return_code = 1 - - except KeyboardInterrupt: - # This is a sad hack. Unfortunately subprocess goes - # bonkers with ctrl-c and we start forking merrily. - print('\nCtrl-C detected, goodbye.') - if tmpdir: - shutil.rmtree(tmpdir) - os.kill(0, 9) - - if yaml and args.export_fixes: - print('Writing fixes to ' + args.export_fixes + ' ...') - try: - merge_replacement_files(tmpdir, args.export_fixes) - except: - print('Error exporting fixes.\n', file=sys.stderr) - traceback.print_exc() - return_code=1 - - if args.fix: - print('Applying fixes ...') - try: - apply_fixes(args, clang_apply_replacements_binary, tmpdir) - except: - print('Error applying fixes.\n', file=sys.stderr) - traceback.print_exc() - return_code = 1 - - if tmpdir: - shutil.rmtree(tmpdir) - sys.exit(return_code) - - -if __name__ == '__main__': - main() diff --git a/cpp_src/CMakeLists.txt b/cpp_src/CMakeLists.txt index 8b287338f..f40d1fe2e 100644 --- a/cpp_src/CMakeLists.txt +++ b/cpp_src/CMakeLists.txt @@ -87,7 +87,7 @@ endif() if(MSVC) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -wd4244 -wd4267 -wd4996 -wd4717 -MP -MD") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -wd4244 -wd4267 -wd4996 -wd4717 -wd4800 -wd4396 -wd4503 -MP -MD") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -wd4244 -wd4267 -wd4996 -wd4717 -wd4800 -wd4396 -wd4503 -MP -MD /bigobj") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -SAFESEH:NO") else() set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Werror -Wswitch-enum") @@ -619,8 +619,3 @@ add_custom_target(collect_coverage COMMAND genhtml coverage_filtered.info -o coverage_output COMMENT "Collecting Reindexer coverage" ) - -# Configure compile options extra -if(MSVC) - target_compile_options(${TARGET} PRIVATE /bigobj) -endif() diff --git a/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh new file mode 100755 index 000000000..873181e20 --- /dev/null +++ b/cpp_src/cmd/reindexer_server/test/test_storage_compatibility.sh @@ -0,0 +1,197 @@ +#!/bin/bash +# Task: https://github.com/restream/reindexer/-/issues/1188 +set -e + +function KillAndRemoveServer { + local pid=$1 + kill $pid + wait $pid + yum remove -y 'reindexer*' > /dev/null +} + +function WaitForDB { + # wait until DB is loaded + set +e # disable "exit on error" so the script won't stop when DB's not loaded yet + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + while [[ $is_connected != "test" ]] + do + sleep 2 + is_connected=$(reindexer_tool --dsn $ADDRESS --command '\databases list'); + done + set -e +} + +function CompareNamespacesLists { + local ns_list_actual=$1 + local ns_list_expected=$2 + local pid=$3 + + diff=$(echo ${ns_list_actual[@]} ${ns_list_expected[@]} | tr ' ' '\n' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: namespaces list not changed" + else + echo "##### FAIL: namespaces list was changed" + echo "expected: $ns_list_expected" + echo "actual: $ns_list_actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + +function CompareMemstats { + local actual=$1 + local expected=$2 + local pid=$3 + diff=$(echo ${actual[@]} ${expected[@]} | tr ' ' '\n' | sed 's/\(.*\),$/\1/' | sort | uniq -u) # compare in any order + if [ "$diff" == "" ]; then + echo "## PASS: memstats not changed" + else + echo "##### FAIL: memstats was changed" + echo "expected: $expected" + echo "actual: $actual" + KillAndRemoveServer $pid; + exit 1 + fi +} + + +RX_SERVER_CURRENT_VERSION_RPM="$(basename build/reindexer-*server*.rpm)" +VERSION_FROM_RPM=$(echo "$RX_SERVER_CURRENT_VERSION_RPM" | grep -o '.*server-..') +VERSION=$(echo ${VERSION_FROM_RPM: -2:1}) # one-digit version + +echo "## choose latest release rpm file" +if [ $VERSION == 3 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 3) + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +elif [ $VERSION == 4 ]; then + LATEST_RELEASE=$(python3 cpp_src/cmd/reindexer_server/test/get_last_rx_version.py -v 4) + # replicationstats ns added for v4 + namespaces_list_expected=$'purchase_options_ext_dict\nchild_account_recommendations\n#config\n#activitystats\n#replicationstats\nradio_channels\ncollections\n#namespaces\nwp_imports_tasks\nepg_genres\nrecom_media_items_personal\nrecom_epg_archive_default\n#perfstats\nrecom_epg_live_default\nmedia_view_templates\nasset_video_servers\nwp_tasks_schedule\nadmin_roles\n#clientsstats\nrecom_epg_archive_personal\nrecom_media_items_similars\nmenu_items\naccount_recommendations\nkaraoke_items\nmedia_items\nbanners\n#queriesperfstats\nrecom_media_items_default\nrecom_epg_live_personal\nservices\n#memstats\nchannels\nmedia_item_recommendations\nwp_tasks_tasks\nepg' +else + echo "Unknown version" + exit 1 +fi + +echo "## downloading latest release rpm file: $LATEST_RELEASE" +curl "http://repo.itv.restr.im/itv-api-ng/7/x86_64/$LATEST_RELEASE" --output $LATEST_RELEASE; +echo "## downloading example DB" +curl "https://github.com/restream/reindexer_testdata/-/raw/main/dump_demo.zip" --output dump_demo.zip; +unzip -o dump_demo.zip # unzips into demo_test.rxdump; + +ADDRESS="cproto://127.0.0.1:6534/" +DB_NAME="test" + +memstats_expected=$'[ +{"name":"account_recommendations","replication":{"data_hash":6833710705,"data_count":1}}, +{"name":"admin_roles","replication":{"data_hash":1896088071,"data_count":2}}, +{"name":"asset_video_servers","replication":{"data_hash":7404222244,"data_count":97}}, +{"name":"banners","replication":{"data_hash":0,"data_count":0}}, +{"name":"channels","replication":{"data_hash":457292509431319,"data_count":3941}}, +{"name":"child_account_recommendations","replication":{"data_hash":6252344969,"data_count":1}}, +{"name":"collections","replication":{"data_hash":0,"data_count":0}}, +{"name":"epg","replication":{"data_hash":-7049751653258,"data_count":1623116}}, +{"name":"epg_genres","replication":{"data_hash":8373644068,"data_count":1315}}, +{"name":"karaoke_items","replication":{"data_hash":5858155773472,"data_count":4500}}, +{"name":"media_item_recommendations","replication":{"data_hash":-6520334670,"data_count":35886}}, +{"name":"media_items","replication":{"data_hash":-1824301168479972392,"data_count":65448}}, +{"name":"media_view_templates","replication":{"data_hash":0,"data_count":0}}, +{"name":"menu_items","replication":{"data_hash":0,"data_count":0}}, +{"name":"purchase_options_ext_dict","replication":{"data_hash":24651210926,"data_count":3}}, +{"name":"radio_channels","replication":{"data_hash":37734732881,"data_count":28}}, +{"name":"recom_epg_archive_default","replication":{"data_hash":0,"data_count":0}}, +{"name":"recom_epg_archive_personal","replication":{"data_hash":0,"data_count":0}}, +{"name":"recom_epg_live_default","replication":{"data_hash":0,"data_count":0}}, +{"name":"recom_epg_live_personal","replication":{"data_hash":0,"data_count":0}}, +{"name":"recom_media_items_default","replication":{"data_hash":8288213744,"data_count":3}}, +{"name":"recom_media_items_personal","replication":{"data_hash":0,"data_count":0}}, +{"name":"recom_media_items_similars","replication":{"data_hash":-672103903,"data_count":33538}}, +{"name":"services","replication":{"data_hash":0,"data_count":0}}, +{"name":"wp_imports_tasks","replication":{"data_hash":777859741066,"data_count":1145}}, +{"name":"wp_tasks_schedule","replication":{"data_hash":12595790956,"data_count":4}}, +{"name":"wp_tasks_tasks","replication":{"data_hash":28692716680,"data_count":281}} +] +Returned 27 rows' + +echo "##### Forward compatibility test #####" + +DB_PATH=$(pwd)"/rx_db" + +echo "Database: "$DB_PATH + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +# run RX server with disabled logging +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f demo_test.rxdump --createdb; +sleep 1; + +namespaces_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_1; +CompareNamespacesLists "${namespaces_1[@]}" "${namespaces_list_expected[@]}" $server_pid; + +memstats_1=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select name, replication.data_hash, replication.data_count from #memstats order by name'); +CompareMemstats "${memstats_1[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l0 --corelog=none --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_2; +CompareNamespacesLists "${namespaces_2[@]}" "${namespaces_1[@]}" $server_pid; + + +memstats_2=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select name, replication.data_hash, replication.data_count from #memstats order by name'); +CompareMemstats "${memstats_2[@]}" "${memstats_1[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; +sleep 1; + +echo "##### Backward compatibility test #####" + +echo "## installing current version: $RX_SERVER_CURRENT_VERSION_RPM" +yum install -y build/*.rpm > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +reindexer_tool --dsn $ADDRESS$DB_NAME -f demo_test.rxdump --createdb; +sleep 1; + +namespaces_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_3; +CompareNamespacesLists "${namespaces_3[@]}" "${namespaces_list_expected[@]}" $server_pid; + + +memstats_3=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select name, replication.data_hash, replication.data_count from #memstats order by name'); +CompareMemstats "${memstats_3[@]}" "${memstats_expected[@]}" $server_pid; + +KillAndRemoveServer $server_pid; + +echo "## installing latest release: $LATEST_RELEASE" +yum install -y $LATEST_RELEASE > /dev/null; +reindexer_server -l warning --httplog=none --rpclog=none --db $DB_PATH & +server_pid=$! +sleep 2; + +WaitForDB + +namespaces_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command '\namespaces list'); +echo $namespaces_4; +CompareNamespacesLists "${namespaces_4[@]}" "${namespaces_3[@]}" $server_pid; + +memstats_4=$(reindexer_tool --dsn $ADDRESS$DB_NAME --command 'select name, replication.data_hash, replication.data_count from #memstats order by name'); +CompareMemstats "${memstats_4[@]}" "${memstats_3[@]}" $server_pid; + +KillAndRemoveServer $server_pid; +rm -rf $DB_PATH; diff --git a/cpp_src/cmd/reindexer_tool/commandsexecutor.cc b/cpp_src/cmd/reindexer_tool/commandsexecutor.cc index 085741e3c..254242b65 100644 --- a/cpp_src/cmd/reindexer_tool/commandsexecutor.cc +++ b/cpp_src/cmd/reindexer_tool/commandsexecutor.cc @@ -25,7 +25,6 @@ const std::string kVariableOutput = "output"; const std::string kOutputModeJson = "json"; const std::string kOutputModeTable = "table"; const std::string kOutputModePretty = "pretty"; -const std::string kOutputModePrettyCollapsed = "collapsed"; const std::string kBenchNamespace = "rxtool_bench"; const std::string kBenchIndex = "id"; @@ -157,7 +156,10 @@ Error CommandsExecutor::fromFileImpl(std::istream& in) { while (GetStatus().running && std::getline(in, line.str)) { if (reindexer::checkIfStartsWith("\\upsert ", line.str) || reindexer::checkIfStartsWith("\\delete ", line.str)) { try { - cmdCh.push(line); + LineData l; + l.lineNum = line.lineNum; + reindexer::deepCopy(l.str, line.str); + cmdCh.push(std::move(l)); } catch (std::exception&) { break; } @@ -506,7 +508,7 @@ Error CommandsExecutor::processImpl(const std::string& command) noe } catch (std::exception& e) { return Error(errLogic, "std::exception during command's execution: %s", e.what()); } catch (...) { - return Error(errLogic, "Unknow exception during command's execution"); + return Error(errLogic, "Unknown exception during command's execution"); } } return Error(errParams, "Unknown command '%s'. Type '\\help' to list of available commands", token); @@ -683,7 +685,7 @@ Error CommandsExecutor::commandUpsert(const std::string& command) { status = db().Upsert(nsName, item); if (!fromFile_ && status.ok()) { - output_() << "Upserted successfuly: 1 items" << std::endl; + output_() << "Upserted successfully: 1 items" << std::endl; } return status; } @@ -809,7 +811,7 @@ Error CommandsExecutor::commandDump(const std::string& command) { } for (auto it : itemResults) { - if (auto err = it.Status(); !err.ok()) { + if (err = it.Status(); !err.ok()) { return err; } if (cancelCtx_.IsCancelled()) { @@ -1103,7 +1105,7 @@ Error CommandsExecutor::commandProcessDatabase err = db().Status(); } if (err.ok()) { - output_() << "Succesfully connected to " << currentDsn << std::endl; + output_() << "Successfully connected to " << currentDsn << std::endl; } return err; } else if (subCommand == "create"sv) { @@ -1119,7 +1121,7 @@ Error CommandsExecutor::commandProcessDatabase std::vector dbNames; err = db().EnumDatabases(dbNames); if (std::find(dbNames.begin(), dbNames.end(), std::string(dbName)) != dbNames.end()) { - output_() << "Succesfully created database '" << dbName << "'" << std::endl; + output_() << "Successfully created database '" << dbName << "'" << std::endl; } else { std::cerr << "Error on database '" << dbName << "' creation" << std::endl; } diff --git a/cpp_src/core/cjson/baseencoder.cc b/cpp_src/core/cjson/baseencoder.cc index 992a268a6..b3950919d 100644 --- a/cpp_src/core/cjson/baseencoder.cc +++ b/cpp_src/core/cjson/baseencoder.cc @@ -1,6 +1,5 @@ #include "baseencoder.h" #include -#include #include "cjsonbuilder.h" #include "cjsontools.h" #include "core/keyvalue/p_string.h" @@ -42,7 +41,7 @@ void BaseEncoder::Encode(ConstPayload& pl, Builder& builder, IAdditiona } objectScalarIndexes_.reset(); - std::fill_n(std::begin(fieldsoutcnt_), pl.NumFields(), 0); + std::fill(fieldsoutcnt_.begin(), fieldsoutcnt_.end(), 0); builder.SetTagsMatcher(tagsMatcher_); if constexpr (kWithTagsPathTracking) { builder.SetTagsPath(&curTagsPath_); @@ -167,11 +166,11 @@ bool BaseEncoder::encode(ConstPayload* pl, Serializer& rdser, Builder& [&](KeyValueType::String) { builder.Array(tagName, pl->GetArray(tagField).subspan(cnt, count), cnt); }, [&](KeyValueType::Uuid) { builder.Array(tagName, pl->GetArray(tagField).subspan(cnt, count), cnt); }, [](OneOf) noexcept { - assertrx(0); + assertrx(false); abort(); }); } - cnt += count; + cnt += int(count); break; } case TAG_NULL: diff --git a/cpp_src/core/cjson/baseencoder.h b/cpp_src/core/cjson/baseencoder.h index 9eafa4808..bb1cd818d 100644 --- a/cpp_src/core/cjson/baseencoder.h +++ b/cpp_src/core/cjson/baseencoder.h @@ -10,7 +10,6 @@ namespace reindexer { class TagsMatcher; -class JsonBuilder; class MsgPackBuilder; class ProtobufBuilder; class CsvBuilder; @@ -38,7 +37,7 @@ class IAdditionalDatasource { template class BaseEncoder { public: - BaseEncoder(const TagsMatcher* tagsMatcher, const FieldsSet* filter = nullptr); + explicit BaseEncoder(const TagsMatcher* tagsMatcher, const FieldsSet* filter = nullptr); void Encode(ConstPayload& pl, Builder& builder, IAdditionalDatasource* = nullptr); void Encode(std::string_view tuple, Builder& wrSer, IAdditionalDatasource*); @@ -61,9 +60,9 @@ class BaseEncoder { std::string_view getPlTuple(ConstPayload& pl); - const TagsMatcher* tagsMatcher_; - std::array fieldsoutcnt_{0}; - const FieldsSet* filter_; + const TagsMatcher* tagsMatcher_{nullptr}; + std::array fieldsoutcnt_; + const FieldsSet* filter_{nullptr}; WrSerializer tmpPlTuple_; TagsPath curTagsPath_; IndexedTagsPathInternalT indexedTagsPath_; diff --git a/cpp_src/core/cjson/cjsondecoder.cc b/cpp_src/core/cjson/cjsondecoder.cc index 26c7d8549..3064f14e8 100644 --- a/cpp_src/core/cjson/cjsondecoder.cc +++ b/cpp_src/core/cjson/cjsondecoder.cc @@ -11,7 +11,6 @@ bool CJsonDecoder::decodeCJson(Payload& pl, Serializer& rdser, WrSerializer& wrs const ctag tag = rdser.GetCTag(); TagType tagType = tag.Type(); if (tagType == TAG_END) { - recoder.Serialize(wrser); wrser.PutCTag(kCTagEnd); return false; } diff --git a/cpp_src/core/cjson/cjsondecoder.h b/cpp_src/core/cjson/cjsondecoder.h index 443fea4c7..747c2fd7e 100644 --- a/cpp_src/core/cjson/cjsondecoder.h +++ b/cpp_src/core/cjson/cjsondecoder.h @@ -17,10 +17,8 @@ class Recoder { [[nodiscard]] virtual TagType Type(TagType oldTagType) = 0; virtual void Recode(Serializer&, WrSerializer&) const = 0; virtual void Recode(Serializer&, Payload&, int tagName, WrSerializer&) = 0; - [[nodiscard]] virtual bool Match(int field) noexcept = 0; - [[nodiscard]] virtual bool Match(TagType, const TagsPath&) = 0; - virtual void Serialize(WrSerializer& wrser) = 0; - virtual bool Reset() = 0; + [[nodiscard]] virtual bool Match(int field) const noexcept = 0; + [[nodiscard]] virtual bool Match(const TagsPath&) const = 0; virtual ~Recoder() = default; }; @@ -49,7 +47,7 @@ class CJsonDecoder { class IndexedSkipFilter { public: - IndexedSkipFilter(const FieldsSet& f) noexcept : f_(&f) {} + explicit IndexedSkipFilter(const FieldsSet& f) noexcept : f_(&f) {} IndexedSkipFilter MakeCleanCopy() const noexcept { return IndexedSkipFilter(*f_); } IndexedSkipFilter MakeSkipFilter() const noexcept { return IndexedSkipFilter(*f_); } @@ -57,7 +55,7 @@ class CJsonDecoder { RX_ALWAYS_INLINE bool match(const TagsPath&) const noexcept { return false; } private: - const FieldsSet* f_; + const FieldsSet* f_{nullptr}; }; class RestrictingFilter { @@ -85,8 +83,8 @@ class CJsonDecoder { } private: - const FieldsSet* f_; - bool match_; + const FieldsSet* f_{nullptr}; + bool match_{false}; }; class DummyRecoder { @@ -96,7 +94,6 @@ class CJsonDecoder { RX_ALWAYS_INLINE bool Recode(Serializer&, Payload&, int, WrSerializer&) const noexcept { return false; } RX_ALWAYS_INLINE TagType RegisterTagType(TagType tagType, int) const noexcept { return tagType; } RX_ALWAYS_INLINE TagType RegisterTagType(TagType tagType, const TagsPath&) const noexcept { return tagType; } - RX_ALWAYS_INLINE void Serialize(WrSerializer&) const {} }; class DefaultRecoder { public: @@ -121,14 +118,13 @@ class CJsonDecoder { return needToRecode_ ? r_->Type(tagType) : tagType; } RX_ALWAYS_INLINE TagType RegisterTagType(TagType tagType, const TagsPath& tagsPath) { - needToRecode_ = r_->Match(tagType, tagsPath); + needToRecode_ = r_->Match(tagsPath); return needToRecode_ ? r_->Type(tagType) : tagType; } - RX_ALWAYS_INLINE void Serialize(WrSerializer& wser) const { r_->Serialize(wser); } private: - Recoder* r_; - bool needToRecode_; + Recoder* r_{nullptr}; + bool needToRecode_{false}; }; struct NamedTagOpt {}; struct NamelessTagOpt {}; @@ -166,7 +162,7 @@ class CJsonDecoder { TagsMatcher& tagsMatcher_; TagsPath tagsPath_; - int32_t arrayLevel_ = 0; + int32_t arrayLevel_{0}; ScalarIndexesSetT objectScalarIndexes_; // storage for owning strings obtained from numbers std::deque& storage_; diff --git a/cpp_src/core/cjson/defaultvaluecoder.cc b/cpp_src/core/cjson/defaultvaluecoder.cc deleted file mode 100644 index d33713150..000000000 --- a/cpp_src/core/cjson/defaultvaluecoder.cc +++ /dev/null @@ -1,171 +0,0 @@ -#include "defaultvaluecoder.h" - -namespace reindexer { - -DefaultValueCoder::DefaultValueCoder(std::string_view ns, const PayloadFieldType& fld, std::vector&& tps, int16_t fieldIdx) - : ns_(ns), - field_(fld.Name()), - tags_(std::move(tps)), - fieldIdx_(fieldIdx), - type_(fld.Type().ToTagType()), - array_(fld.IsArray()), - basePath_(&tags_.front()) {} - -bool DefaultValueCoder::Match(int field) noexcept { - // non-nested field present in tuple - if ((field == fieldIdx_) && ready()) { - state_ = State::found; - } - return false; // returned result is always same -} - -bool DefaultValueCoder::Match(TagType tt, const TagsPath& tp) { - static const bool result = false; // returned result is always same - - // nothing to look for (start tuple global object) - if (tp.empty()) { - state_ = State::wait; - // inArray_ = false; - // arrField_ = 0; - return result; - } - - // found\recorded earlier - if ((state_ == State::found) || ((state_ == State::write) /* && !inArray_*/)) { - return result; - } - - // check if active array has been processed - const bool arrayTag = (tt == TAG_ARRAY); - // if (inArray_) { - // inArray_ = ((tt == TAG_OBJECT) || arrayTag) ? (tp.back() == arrField_) : (tp[tp.size() - 2] == arrField_); // -2 pre-last item - // // recorded earlier - stop it - // if (!inArray_ && (state_ == State::write)) { - // return result; - // } - // } - - // try match nested field - if (tt == TAG_OBJECT) { - assertrx(state_ != State::found); - match(tp); - return result; - } - - // may be end element of adjacent nested field - if (arrayTag) { - if (tp.size() <= basePath_->size() && std::equal(tp.begin(), tp.end(), basePath_->begin())) { - // Do not create anything inside objects arrays #1819 - state_ = State::found; - return result; - } - // inArray_ = (tp.front() == basePath_->front()); - // arrField_ = tp.back(); - } - - // not nested - if (copyPos_ == 0) { - return result; - } - - // detect array insertion into array (not supported) - if (arrayTag && array_) { - state_ = State::found; // do nothing - } else if ((tp.front() == basePath_->front()) && (tp.size() > basePath_->size())) { - ++nestingLevel_; - } - - return result; -} - -void DefaultValueCoder::Serialize(WrSerializer& wrser) { - if (blocked()) { - return; // skip processing - } - - // skip nested levels - if ((basePath_->size() > 1) || (nestingLevel_ > 1)) { - assertrx(nestingLevel_ > 0); - --nestingLevel_; - - // new field - move to valid level - if (nestingLevel_ > copyPos_) { - return; - } - } - - const auto written = write(wrser); - Reset(); - state_ = written ? State::write : State::found; -} - -bool DefaultValueCoder::Reset() noexcept { - nestingLevel_ = 1; - copyPos_ = 0; - // NOTE: return true when updating tuple - return (state_ == State::write); -} - -void DefaultValueCoder::match(const TagsPath& tp) { - ++nestingLevel_; - - for (auto& path : tags_) { - if (path.front() != tp.front()) { - continue; - } - - copyPos_ = 1; - auto pathSize = path.size(); - auto sz = std::min(pathSize, tp.size()); - for (size_t idx = 1; idx < sz; ++idx) { - if (path[idx] != tp[idx]) { - break; - } - ++copyPos_; - - // we are trying to add field with non-nested paths, but an intersection was found in additional nested paths. - // Stop, throw an error - if (tags_.front().size() == 1) { - throw Error(errLogic, "Cannot add field with name '%s' to namespace '%s'. One of nested json paths is already in use", - field_, ns_); - } - } - state_ = State::match; - basePath_ = &path; - break; - } -} - -bool DefaultValueCoder::write(WrSerializer& wrser) const { - if (array_ && copyPos_ + 1 < basePath_->size()) { - // Do not create multiple levels for nested array indexes to avoid problems with decoding in Go/Java connectors. #1819 - return false; - } - int32_t nestedObjects = 0; - for (size_t idx = copyPos_, sz = basePath_->size(); idx < sz; ++idx) { - auto tagName = (*basePath_)[idx]; - // real index field in last tag - const bool finalTag = (idx == (sz - 1)); - if (finalTag) { - if (array_) { - wrser.PutCTag(ctag{TAG_ARRAY, tagName, fieldIdx_}); - wrser.PutVarUint(0); - } else { - wrser.PutCTag(ctag{type_, tagName, fieldIdx_}); - } - break; - } - - // start nested object - wrser.PutCTag(ctag{TAG_OBJECT, tagName}); - ++nestedObjects; - } - - // add end tags to all objects - while (nestedObjects-- > 0) { - wrser.PutCTag(kCTagEnd); - } - return true; -} - -} // namespace reindexer diff --git a/cpp_src/core/cjson/defaultvaluecoder.h b/cpp_src/core/cjson/defaultvaluecoder.h deleted file mode 100644 index 3f4276dc2..000000000 --- a/cpp_src/core/cjson/defaultvaluecoder.h +++ /dev/null @@ -1,41 +0,0 @@ -#pragma once - -#include "cjsondecoder.h" - -namespace reindexer { - -class DefaultValueCoder : public Recoder { -public: - DefaultValueCoder(std::string_view ns, const PayloadFieldType& fld, std::vector&& tps, int16_t fieldIdx); - RX_ALWAYS_INLINE TagType Type(TagType tt) noexcept override final { return tt; } - [[nodiscard]] bool Match(int f) noexcept override final; - [[nodiscard]] bool Match(TagType tt, const TagsPath& tp) override final; - RX_ALWAYS_INLINE void Recode(Serializer&, WrSerializer&) const noexcept override final { assertrx(false); } - RX_ALWAYS_INLINE void Recode(Serializer&, Payload&, int, WrSerializer&) noexcept override final { assertrx(false); } - void Serialize(WrSerializer& wrser) override final; - bool Reset() noexcept override final; - -private: - void match(const TagsPath& tp); - [[nodiscard]] bool write(WrSerializer& wrser) const; - [[nodiscard]] RX_ALWAYS_INLINE bool blocked() const noexcept { return ((state_ == State::found) || (state_ == State::write)); } - [[nodiscard]] RX_ALWAYS_INLINE bool ready() const noexcept { return ((state_ == State::wait) || (state_ == State::match)); } - -private: - const std::string ns_; - const std::string field_; - const std::vector tags_; - const int16_t fieldIdx_{0}; - const TagType type_; - const bool array_{false}; - - const TagsPath* basePath_{nullptr}; - enum class State { wait, found, match, write } state_{State::wait}; - uint32_t nestingLevel_{1}; - uint32_t copyPos_{0}; - - // bool inArray_{false}; - // int16_t arrField_{0}; -}; - -} // namespace reindexer diff --git a/cpp_src/core/cjson/jsondecoder.cc b/cpp_src/core/cjson/jsondecoder.cc index 85d3ac79d..7ee9a0e0e 100644 --- a/cpp_src/core/cjson/jsondecoder.cc +++ b/cpp_src/core/cjson/jsondecoder.cc @@ -115,13 +115,8 @@ void JsonDecoder::decodeJson(Payload* pl, CJsonBuilder& builder, const gason::Js builder.Null(tagName); break; case gason::JSON_ARRAY: { - ObjType type; CounterGuardIR32 g(arrayLevel_); - if (gason::isHomogeneousArray(v)) { - type = ObjType::TypeArray; - } else { - type = ObjType::TypeObjectArray; - } + ObjType type = (gason::isHomogeneousArray(v)) ? ObjType::TypeArray : ObjType::TypeObjectArray; auto arrNode = builder.Array(tagName, type); for (const auto& elem : v) { decodeJson(pl, arrNode, elem.value, 0, match); diff --git a/cpp_src/core/cjson/jsondecoder.h b/cpp_src/core/cjson/jsondecoder.h index 73dad50f2..066f6740a 100644 --- a/cpp_src/core/cjson/jsondecoder.h +++ b/cpp_src/core/cjson/jsondecoder.h @@ -8,8 +8,8 @@ namespace reindexer { class JsonDecoder { public: - explicit JsonDecoder(TagsMatcher& tagsMatcher) noexcept : tagsMatcher_(tagsMatcher), filter_(nullptr) {} - JsonDecoder(TagsMatcher& tagsMatcher, const FieldsSet* filter) noexcept : tagsMatcher_(tagsMatcher), filter_(filter) {} + explicit JsonDecoder(TagsMatcher& tagsMatcher, const FieldsSet* filter = nullptr) noexcept + : tagsMatcher_(tagsMatcher), filter_(filter) {} Error Decode(Payload& pl, WrSerializer& wrSer, const gason::JsonValue& v); void Decode(std::string_view json, CJsonBuilder& builder, const TagsPath& fieldPath); @@ -21,8 +21,8 @@ class JsonDecoder { TagsMatcher& tagsMatcher_; TagsPath tagsPath_; - const FieldsSet* filter_; - int32_t arrayLevel_ = 0; + const FieldsSet* filter_{nullptr}; + int32_t arrayLevel_{0}; ScalarIndexesSetT objectScalarIndexes_; }; diff --git a/cpp_src/core/cjson/msgpackdecoder.cc b/cpp_src/core/cjson/msgpackdecoder.cc index e08e55ab5..8b72c9f21 100644 --- a/cpp_src/core/cjson/msgpackdecoder.cc +++ b/cpp_src/core/cjson/msgpackdecoder.cc @@ -145,13 +145,13 @@ Error MsgPackDecoder::Decode(std::string_view buf, Payload& pl, WrSerializer& wr } catch (const Error& err) { return err; } catch (const std::exception& ex) { - return Error(errNotValid, "%s", ex.what()); + return {errNotValid, "%s", ex.what()}; } catch (...) { // all internal errors shall be handled and converted to Error - return Error(errNotValid, "Unexpected exception"); + return {errNotValid, "Unexpected exception"}; } - return Error(); + return {}; } constexpr std::string_view ToString(msgpack_object_type type) { diff --git a/cpp_src/core/cjson/protobufdecoder.cc b/cpp_src/core/cjson/protobufdecoder.cc index a8313ab01..7d2dcc04a 100644 --- a/cpp_src/core/cjson/protobufdecoder.cc +++ b/cpp_src/core/cjson/protobufdecoder.cc @@ -1,14 +1,13 @@ #include "protobufdecoder.h" #include "core/schema.h" #include "estl/protobufparser.h" -#include "protobufbuilder.h" namespace reindexer { void ArraysStorage::UpdateArraySize(int tagName, int field) { GetArray(tagName, field); } CJsonBuilder& ArraysStorage::GetArray(int tagName, int field) { - assertrx(indexes_.size() > 0); + assertrx(!indexes_.empty()); auto it = data_.find(tagName); if (it == data_.end()) { indexes_.back().emplace_back(tagName); @@ -25,7 +24,7 @@ CJsonBuilder& ArraysStorage::GetArray(int tagName, int field) { void ArraysStorage::onAddObject() { indexes_.emplace_back(h_vector()); } void ArraysStorage::onObjectBuilt(CJsonBuilder& parent) { - assertrx(indexes_.size() > 0); + assertrx(!indexes_.empty()); for (int tagName : indexes_.back()) { auto it = data_.find(tagName); assertrx(it != data_.end()); @@ -69,10 +68,10 @@ void ProtobufDecoder::setValue(Payload& pl, CJsonBuilder& builder, ProtobufValue Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const ProtobufValue& item) { ProtobufObject object(item.As(), *schema_, tagsPath_, tm_); ProtobufParser parser(object); - bool packed = item.IsOfPrimitiveType(); - int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); + const bool packed = item.IsOfPrimitiveType(); + const int field = tm_.tags2field(tagsPath_.data(), tagsPath_.size()); if (field > 0) { - auto& f = pl.Type().Field(field); + const auto& f = pl.Type().Field(field); if rx_unlikely (!f.IsArray()) { throw Error(errLogic, "Error parsing protobuf field '%s' - got array, expected scalar %s", f.Name(), f.Type().Name()); } @@ -104,7 +103,7 @@ Error ProtobufDecoder::decodeArray(Payload& pl, CJsonBuilder& builder, const Pro } } } - return Error(); + return {}; } Error ProtobufDecoder::decode(Payload& pl, CJsonBuilder& builder, const ProtobufValue& item) { diff --git a/cpp_src/core/cjson/uuid_recoders.h b/cpp_src/core/cjson/uuid_recoders.h index cdc264ac3..9bb51c701 100644 --- a/cpp_src/core/cjson/uuid_recoders.h +++ b/cpp_src/core/cjson/uuid_recoders.h @@ -19,10 +19,8 @@ class RecoderUuidToString : public Recoder { } void Recode(Serializer&, WrSerializer&) const override final; void Recode(Serializer&, Payload&, int, WrSerializer&) override final { assertrx(false); } - [[nodiscard]] bool Match(int) noexcept override final { return false; } - [[nodiscard]] bool Match(TagType, const TagsPath& tp) noexcept override final { return tagsPath_ == tp; } - void Serialize(WrSerializer&) override final {} - bool Reset() override final { return false; } + [[nodiscard]] bool Match(int) const noexcept final { return false; } + [[nodiscard]] bool Match(const TagsPath& tp) const noexcept final { return tagsPath_ == tp; } private: TagsPath tagsPath_; @@ -54,8 +52,8 @@ class RecoderStringToUuidArray : public Recoder { } return TAG_ARRAY; } - [[nodiscard]] bool Match(int f) noexcept override final { return f == field_; } - [[nodiscard]] bool Match(TagType, const TagsPath&) noexcept override final { return false; } + [[nodiscard]] bool Match(int f) const noexcept final { return f == field_; } + [[nodiscard]] bool Match(const TagsPath&) const noexcept final { return false; } void Recode(Serializer&, WrSerializer&) const override final { assertrx(false); } void Recode(Serializer& rdser, Payload& pl, int tagName, WrSerializer& wrser) override final { if (fromNotArrayField_) { @@ -78,8 +76,6 @@ class RecoderStringToUuidArray : public Recoder { wrser.PutVarUint(count); } } - void Serialize(WrSerializer&) override final {} - bool Reset() override final { return false; } private: const int field_{std::numeric_limits::max()}; @@ -98,15 +94,13 @@ class RecoderStringToUuid : public Recoder { } return TAG_UUID; } - [[nodiscard]] bool Match(int f) noexcept override final { return f == field_; } - [[nodiscard]] bool Match(TagType, const TagsPath&) noexcept override final { return false; } + [[nodiscard]] bool Match(int f) const noexcept final { return f == field_; } + [[nodiscard]] bool Match(const TagsPath&) const noexcept final { return false; } void Recode(Serializer&, WrSerializer&) const override final { assertrx(false); } void Recode(Serializer& rdser, Payload& pl, int tagName, WrSerializer& wrser) override final { pl.Set(field_, Variant{rdser.GetStrUuid()}, true); wrser.PutCTag(ctag{TAG_UUID, tagName, field_}); } - void Serialize(WrSerializer&) override final {} - bool Reset() override final { return false; } private: const int field_{std::numeric_limits::max()}; diff --git a/cpp_src/core/formatters/key_string_fmt.h b/cpp_src/core/formatters/key_string_fmt.h index 37869b917..59d0086df 100644 --- a/cpp_src/core/formatters/key_string_fmt.h +++ b/cpp_src/core/formatters/key_string_fmt.h @@ -1,7 +1,7 @@ #pragma once -#include "fmt/format.h" #include "core/keyvalue/key_string.h" +#include "fmt/format.h" template <> struct fmt::printf_formatter { diff --git a/cpp_src/core/formatters/uuid_fmt.h b/cpp_src/core/formatters/uuid_fmt.h index e77ec886b..e249d4b55 100644 --- a/cpp_src/core/formatters/uuid_fmt.h +++ b/cpp_src/core/formatters/uuid_fmt.h @@ -1,7 +1,7 @@ #pragma once -#include "fmt/format.h" #include "core/keyvalue/uuid.h" +#include "fmt/format.h" template <> struct fmt::printf_formatter { diff --git a/cpp_src/core/ft/areaholder.h b/cpp_src/core/ft/areaholder.h index 53bac35b1..3bb410fa8 100644 --- a/cpp_src/core/ft/areaholder.h +++ b/cpp_src/core/ft/areaholder.h @@ -14,10 +14,8 @@ struct Area { Area() noexcept : start(0), end(0) {} Area(int s, int e) noexcept : start(s), end(e) {} - [[nodiscard]] bool inline IsIn(int pos) noexcept { return pos <= end && pos >= start; } - - [[nodiscard]] bool inline Concat(const Area& rhs) noexcept { - if (IsIn(rhs.start) || IsIn(rhs.end) || (start > rhs.start && end < rhs.end)) { + [[nodiscard]] bool Concat(const Area& rhs) noexcept { + if (isIn(rhs.start) || isIn(rhs.end) || (start > rhs.start && end < rhs.end)) { if (start > rhs.start) { start = rhs.start; } @@ -31,19 +29,35 @@ struct Area { int start; int end; + +private: + [[nodiscard]] bool inline isIn(int pos) noexcept { return pos <= end && pos >= start; } }; -class AreaHolder; +struct AreaDebug { + enum class PhraseMode { None, Start, End }; + AreaDebug() {} + AreaDebug(int s, int e, std::string&& p, PhraseMode phMode) noexcept : start(s), end(e), props(p), phraseMode(phMode) {} + [[nodiscard]] RX_ALWAYS_INLINE bool Concat(const AreaDebug&) noexcept { return false; } + int start = 0; + int end = 0; + std::string props; + PhraseMode phraseMode = PhraseMode::None; +}; -class AreaBuffer { +template +class AreasInDocument; + +template +class AreasInField { public: - AreaBuffer() = default; + AreasInField() = default; [[nodiscard]] size_t Size() const noexcept { return data_.size(); } [[nodiscard]] bool Empty() const noexcept { return data_.empty(); } void Commit() { if (!data_.empty()) { boost::sort::pdqsort_branchless(data_.begin(), data_.end(), - [](const Area& rhs, const Area& lhs) noexcept { return rhs.start < lhs.start; }); + [](const AreaType& rhs, const AreaType& lhs) noexcept { return rhs.start < lhs.start; }); for (auto vit = data_.begin() + 1; vit != data_.end(); ++vit) { auto prev = vit - 1; if (vit->Concat(*prev)) { @@ -52,7 +66,7 @@ class AreaBuffer { } } } - [[nodiscard]] bool Insert(Area&& area, float termRank, int maxAreasInDoc, float maxTermRank) { + [[nodiscard]] bool Insert(AreaType&& area, float termRank, int maxAreasInDoc, float maxTermRank) { if (!data_.empty() && data_.back().Concat(area)) { return true; } else { @@ -71,22 +85,26 @@ class AreaBuffer { return false; } - [[nodiscard]] const RVector& GetData() const noexcept { return data_; } - void MoveAreas(AreaHolder& to, int field, int32_t rank, int maxAreasInDoc); + [[nodiscard]] const RVector& GetData() const noexcept { return data_; } + void MoveAreas(AreasInDocument& to, int field, int32_t rank, int maxAreasInDoc) { + for (auto& v : data_) { + [[maybe_unused]] bool r = to.InsertArea(std::move(v), field, rank, maxAreasInDoc); + } + to.UpdateRank(rank); + data_.resize(0); + } private: - RVector data_; + RVector data_; int index_ = 0; }; -class AreaHolder { +template +class AreasInDocument { public: - typedef std::shared_ptr Ptr; - typedef std::unique_ptr UniquePtr; - - AreaHolder() = default; - ~AreaHolder() = default; - AreaHolder(AreaHolder&&) = default; + AreasInDocument() = default; + ~AreasInDocument() = default; + AreasInDocument(AreasInDocument&&) = default; void Reserve(int size) { areas_.reserve(size); } void ReserveField(int size) { areas_.resize(size); } void Commit() { @@ -95,21 +113,24 @@ class AreaHolder { area.Commit(); } } - [[nodiscard]] bool AddWord(int pos, int field, int32_t rank, int maxAreasInDoc) { - return InsertArea(Area{pos, pos + 1}, field, rank, maxAreasInDoc); + [[nodiscard]] bool AddWord(AreaType&& area, int field, int32_t rank, int maxAreasInDoc) { + return InsertArea(std::move(area), field, rank, maxAreasInDoc); } void UpdateRank(int32_t rank) noexcept { if (rank > maxTermRank_) { maxTermRank_ = rank; } } - [[nodiscard]] AreaBuffer* GetAreas(int field) { + + [[nodiscard]] AreasInField* GetAreas(int field) { if (!commited_) { Commit(); } return (areas_.size() <= size_t(field)) ? nullptr : &areas_[field]; } - [[nodiscard]] AreaBuffer* GetAreasRaw(int field) noexcept { return (areas_.size() <= size_t(field)) ? nullptr : &areas_[field]; } + [[nodiscard]] AreasInField* GetAreasRaw(int field) noexcept { + return (areas_.size() <= size_t(field)) ? nullptr : &areas_[field]; + } [[nodiscard]] bool IsCommited() const noexcept { return commited_; } [[nodiscard]] size_t GetAreasCount() const noexcept { size_t size = 0; @@ -118,7 +139,7 @@ class AreaHolder { } return size; } - [[nodiscard]] bool InsertArea(Area&& area, int field, int32_t rank, int maxAreasInDoc) { + [[nodiscard]] bool InsertArea(AreaType&& area, int field, int32_t rank, int maxAreasInDoc) { commited_ = false; if (areas_.size() <= size_t(field)) { areas_.resize(field + 1); @@ -129,16 +150,8 @@ class AreaHolder { private: bool commited_ = false; - RVector areas_; + RVector, 3> areas_; int32_t maxTermRank_ = 0; }; -inline void AreaBuffer::MoveAreas(AreaHolder& to, int field, int32_t rank, int maxAreasInDoc) { - for (auto& v : data_) { - [[maybe_unused]] bool r = to.InsertArea(std::move(v), field, rank, maxAreasInDoc); - } - to.UpdateRank(rank); - data_.resize(0); -} - } // namespace reindexer diff --git a/cpp_src/core/ft/ft_fast/dataholder.cc b/cpp_src/core/ft/ft_fast/dataholder.cc index 4dc826ce9..e41d4f0b7 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.cc +++ b/cpp_src/core/ft/ft_fast/dataholder.cc @@ -104,8 +104,8 @@ void DataHolder::Clear() { } template -void DataHolder::StartCommit(bool complte_updated) { - if (NeedRebuild(complte_updated)) { +void DataHolder::StartCommit(bool complete_updated) { + if (NeedRebuild(complete_updated)) { status_ = FullRebuild; Clear(); diff --git a/cpp_src/core/ft/ft_fast/dataholder.h b/cpp_src/core/ft/ft_fast/dataholder.h index 5926fc491..943f02b65 100644 --- a/cpp_src/core/ft/ft_fast/dataholder.h +++ b/cpp_src/core/ft/ft_fast/dataholder.h @@ -109,7 +109,7 @@ class IDataHolder { virtual void Process(size_t fieldSize, bool multithread) = 0; virtual size_t GetMemStat() = 0; virtual void Clear() = 0; - virtual void StartCommit(bool complte_updated) = 0; + virtual void StartCommit(bool complete_updated) = 0; void SetConfig(FtFastConfig* cfg); CommitStep& GetStep(WordIdType id) noexcept { assertrx(id.b.step_num < steps.size()); diff --git a/cpp_src/core/ft/ft_fast/selecter.cc b/cpp_src/core/ft/ft_fast/selecter.cc index e0bef67a1..ad247f35e 100644 --- a/cpp_src/core/ft/ft_fast/selecter.cc +++ b/cpp_src/core/ft/ft_fast/selecter.cc @@ -119,9 +119,9 @@ void Selecter::prepareVariants(std::vector& variants, RV // RX_NO_INLINE just for build test purpose. Do not expect any effect here template -template -RX_NO_INLINE MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, - FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext& rdxCtx) { +template +MergeType Selecter::Process(FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses::Statuses&& mergeStatuses, + const RdxContext& rdxCtx) { FtSelectContext ctx; ctx.rawResults.reserve(dsl.size()); // STEP 2: Search dsl terms for each variant @@ -210,38 +210,38 @@ RX_NO_INLINE MergeData Selecter::Process(FtDSLQuery&& dsl, bool inTransa for (auto& res : ctx.rawResults) { results.emplace_back(std::move(res)); } - const auto maxMergedSize = std::min(size_t(holder_.cfg_->mergeLimit), ctx.totalORVids); + const auto maxMergedSize = std::min(size_t(holder_.cfg_->mergeLimit), ctx.totalORVids); if (maxMergedSize < 0xFFFF) { - return mergeResultsBmType(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, ftSortType, - std::move(mergeStatuses), rdxCtx); + return mergeResultsBmType(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, ftSortType, + std::move(mergeStatuses), rdxCtx); } else if (maxMergedSize < 0xFFFFFFFF) { - return mergeResultsBmType(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, ftSortType, - std::move(mergeStatuses), rdxCtx); + return mergeResultsBmType(std::move(results), ctx.totalORVids, synonymsBounds, inTransaction, ftSortType, + std::move(mergeStatuses), rdxCtx); } else { assertrx_throw(false); } - return MergeData(); + return MergeType(); } template -template -MergeData Selecter::mergeResultsBmType(std::vector&& results, size_t totalORVids, +template +MergeType Selecter::mergeResultsBmType(std::vector&& results, size_t totalORVids, const std::vector& synonymsBounds, bool inTransaction, FtSortType ftSortType, FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext& rdxCtx) { switch (holder_.cfg_->bm25Config.bm25Type) { case FtFastConfig::Bm25Config::Bm25Type::rx: - return mergeResults(std::move(results), totalORVids, synonymsBounds, inTransaction, ftSortType, - std::move(mergeStatuses), rdxCtx); + return mergeResults(std::move(results), totalORVids, synonymsBounds, inTransaction, + ftSortType, std::move(mergeStatuses), rdxCtx); case FtFastConfig::Bm25Config::Bm25Type::classic: - return mergeResults(std::move(results), totalORVids, synonymsBounds, inTransaction, ftSortType, - std::move(mergeStatuses), rdxCtx); + return mergeResults(std::move(results), totalORVids, synonymsBounds, inTransaction, + ftSortType, std::move(mergeStatuses), rdxCtx); case FtFastConfig::Bm25Config::Bm25Type::wordCount: - return mergeResults(std::move(results), totalORVids, synonymsBounds, inTransaction, ftSortType, - std::move(mergeStatuses), rdxCtx); + return mergeResults(std::move(results), totalORVids, synonymsBounds, inTransaction, + ftSortType, std::move(mergeStatuses), rdxCtx); } assertrx_throw(false); - return MergeData(); + return MergeType(); } template @@ -449,33 +449,32 @@ RX_ALWAYS_INLINE void Selecter::debugMergeStep(const char* msg, int vid, template template RX_ALWAYS_INLINE void Selecter::calcFieldBoost(const Calculator& bm25Calc, unsigned long long f, const IdRelType& relid, - const FtDslOpts& opts, int termProc, double& termRank, double& normBm25, - bool& dontSkipCurTermRank, h_vector& ranksInFields, int& field) { + const FtDslOpts& opts, TermRankInfo& termInf, bool& dontSkipCurTermRank, + h_vector& ranksInFields, int& field) { assertrx(f < holder_.cfg_->fieldsCfg.size()); const auto& fldCfg = holder_.cfg_->fieldsCfg[f]; // raw bm25 const double bm25 = bm25Calc.Get(relid.WordsInField(f), holder_.vdocs_[relid.Id()].wordsCount[f], holder_.avgWordsCount_[f]); - // normalized bm25 const double normBm25Tmp = bound(bm25, fldCfg.bm25Weight, fldCfg.bm25Boost); + termInf.positionRank = bound(::pos2rank(relid.MinPositionInField(f)), fldCfg.positionWeight, fldCfg.positionBoost); + termInf.termLenBoost = bound(opts.termLenBoost, fldCfg.termLenWeight, fldCfg.termLenBoost); - const double positionRank = bound(::pos2rank(relid.MinPositionInField(f)), fldCfg.positionWeight, fldCfg.positionBoost); - - float termLenBoost = bound(opts.termLenBoost, fldCfg.termLenWeight, fldCfg.termLenBoost); // final term rank calculation - const double termRankTmp = opts.fieldsOpts[f].boost * termProc * normBm25Tmp * opts.boost * termLenBoost * positionRank; + const double termRankTmp = + opts.fieldsOpts[f].boost * termInf.proc * normBm25Tmp * opts.boost * termInf.termLenBoost * termInf.positionRank; const bool needSumRank = opts.fieldsOpts[f].needSumRank; - if (termRankTmp > termRank) { + if (termRankTmp > termInf.termRank) { if (dontSkipCurTermRank) { - ranksInFields.push_back(termRank); + ranksInFields.push_back(termInf.termRank); } field = f; - normBm25 = normBm25Tmp; - termRank = termRankTmp; + termInf.termRank = termRankTmp; + termInf.bm25Norm = normBm25Tmp; dontSkipCurTermRank = needSumRank; - } else if (!dontSkipCurTermRank && needSumRank && termRank == termRankTmp) { + } else if (!dontSkipCurTermRank && needSumRank && termInf.termRank == termRankTmp) { field = f; - normBm25 = normBm25Tmp; + termInf.bm25Norm = normBm25Tmp; dontSkipCurTermRank = true; } else if (termRankTmp && needSumRank) { ranksInFields.push_back(termRankTmp); @@ -483,44 +482,71 @@ RX_ALWAYS_INLINE void Selecter::calcFieldBoost(const Calculator& bm25Cal } template -AreaHolder Selecter::createAreaFromSubMerge(const MergedIdRelExArea& posInfo) { - AreaHolder area; - if (posInfo.wordPosForChain.empty()) { - return area; +template +void Selecter::insertSubMergeArea(const MergedIdRelGroupArea& posInfo, PosT cur, int prevIndex, AreasInDocument& area) { + PosT last = cur, first = cur; + int indx = int(posInfo.wordPosForChain.size()) - 2; + while (indx >= 0 && prevIndex != -1) { + auto pos = posInfo.wordPosForChain[indx][prevIndex].first; + prevIndex = posInfo.wordPosForChain[indx][prevIndex].second; + first = pos; + indx--; } + assertrx_throw(first.field() == last.field()); + if (area.InsertArea(Area(first.pos(), last.pos() + 1), cur.field(), posInfo.rank, maxAreasInDoc_)) { + area.UpdateRank(float(posInfo.rank)); + } +} - for (const auto& v : posInfo.wordPosForChain.back()) { - IdRelType::PosType last = v.first; - IdRelType::PosType first = v.first; - int indx = int(posInfo.wordPosForChain.size()) - 2; - int prevIndex = v.second; - while (indx >= 0 && prevIndex != -1) { - auto pos = posInfo.wordPosForChain[indx][prevIndex].first; - prevIndex = posInfo.wordPosForChain[indx][prevIndex].second; - first = pos; - indx--; +template +template +void Selecter::insertSubMergeArea(const MergedIdRelGroupArea& posInfo, PosT cur, int prevIndex, + AreasInDocument& area) { + int indx = int(posInfo.wordPosForChain.size()) - 1; + while (indx >= 0 && prevIndex != -1) { + PosT pos = posInfo.wordPosForChain[indx][prevIndex].first; + prevIndex = posInfo.wordPosForChain[indx][prevIndex].second; + AreaDebug::PhraseMode mode = AreaDebug::PhraseMode::None; + if (indx == int(posInfo.wordPosForChain.size()) - 1) { + mode = AreaDebug::PhraseMode::End; + } else if (indx == 0) { + mode = AreaDebug::PhraseMode::Start; } - assertrx(first.field() == last.field()); - if (area.InsertArea(Area(first.pos(), last.pos() + 1), v.first.field(), posInfo.rank, maxAreasInDoc_)) { + if (area.InsertArea(AreaDebug(pos.pos(), pos.pos() + 1, std::move(pos.info), mode), cur.field(), posInfo.rank, -1)) { area.UpdateRank(float(posInfo.rank)); } + + indx--; + } +} + +template +template +AreasInDocument Selecter::createAreaFromSubMerge(const MergedIdRelGroupArea& posInfo) { + AreasInDocument area; + if (posInfo.wordPosForChain.empty()) { + return area; + } + for (const auto& v : posInfo.wordPosForChain.back()) { + insertSubMergeArea(posInfo, v.first, v.second, area); } return area; } template -void Selecter::copyAreas(AreaHolder& subMerged, AreaHolder& merged, int32_t rank) { +template +void Selecter::copyAreas(AreasInDocument& subMerged, AreasInDocument& merged, int32_t rank) { for (size_t f = 0; f < fieldSize_; f++) { auto areas = subMerged.GetAreas(f); if (areas) { - areas->MoveAreas(merged, f, rank, maxAreasInDoc_); + areas->MoveAreas(merged, f, rank, std::is_same_v ? -1 : maxAreasInDoc_); } } } template -template -void Selecter::subMergeLoop(std::vector& subMerged, std::vector& subMergedPos, MergeData& merged, +template +void Selecter::subMergeLoop(MergeType& subMerged, std::vector& subMergedPos, MergeType& merged, std::vector& merged_rd, FtMergeStatuses::Statuses& mergeStatuses, std::vector& idoffsets, std::vector* checkAndOpMerge, const bool hasBeenAnd) { for (size_t subMergedIndex = 0, sz = subMerged.size(); subMergedIndex < sz; subMergedIndex++) { @@ -543,7 +569,7 @@ void Selecter::subMergeLoop(std::vector& subMerged, std::vect for (const auto& p : smPos.posTmp) { mPos.next.Add(p.first); } - merged.vectorAreas.emplace_back(createAreaFromSubMerge(smPos)); + merged.vectorAreas.emplace_back(createAreaFromSubMerge(smPos)); m.areaIndex = merged.vectorAreas.size() - 1; } else { mPos.next = std::move(smPos.posTmp); @@ -565,14 +591,13 @@ void Selecter::subMergeLoop(std::vector& subMerged, std::vect for (const auto& p : subPos.posTmp) { subPos.next.Add(p.first); } - AreaHolder area = createAreaFromSubMerge(subPos); + AreasInDocument area = createAreaFromSubMerge(subPos); int32_t areaIndex = merged[mergedIndex].areaIndex; if (areaIndex != -1 && areaIndex >= int(merged.vectorAreas.size())) { throw Error(errLogic, "FT merge: Incorrect area index %d (areas vector size is %d)", areaIndex, merged.vectorAreas.size()); } - AreaHolder& areaTo = merged.vectorAreas[areaIndex]; - copyAreas(area, areaTo, subMergeInfo.proc); + copyAreas(area, merged.vectorAreas[areaIndex], subMergeInfo.proc); } else { subPos.next = std::move(subPos.posTmp); } @@ -589,9 +614,9 @@ void Selecter::subMergeLoop(std::vector& subMerged, std::vect } template -template +template void Selecter::mergeGroupResult(std::vector& rawResults, size_t from, size_t to, - FtMergeStatuses::Statuses& mergeStatuses, MergeData& merged, std::vector& merged_rd, + FtMergeStatuses::Statuses& mergeStatuses, MergeType& merged, std::vector& merged_rd, OpType op, const bool hasBeenAnd, std::vector& idoffsets, const bool inTransaction, const RdxContext& rdxCtx) { // And - MustPresent @@ -599,14 +624,16 @@ void Selecter::mergeGroupResult(std::vector& rawResul // Not - NotPresent // hasBeenAnd shows whether it is possible to expand the set of documents (if there was already And, then the set of documents is not // expandable) - MergeData subMerged; - std::vector subMergedPositionData; + MergeType subMerged; + std::vector subMergedPositionData; - mergeResultsPart(rawResults, from, to, subMerged, subMergedPositionData, inTransaction, rdxCtx); + mergeResultsPart(rawResults, from, to, subMerged, subMergedPositionData, + inTransaction, rdxCtx); switch (op) { case OpOr: { - subMergeLoop(subMerged, subMergedPositionData, merged, merged_rd, mergeStatuses, idoffsets, nullptr, hasBeenAnd); + subMergeLoop(subMerged, subMergedPositionData, merged, merged_rd, mergeStatuses, + idoffsets, nullptr, hasBeenAnd); break; } case OpAnd: { @@ -614,7 +641,8 @@ void Selecter::mergeGroupResult(std::vector& rawResul // To do this, we intersect the checkAndOpMerge array with the merged array std::vector checkAndOpMerge; checkAndOpMerge.resize(holder_.vdocs_.size(), false); - subMergeLoop(subMerged, subMergedPositionData, merged, merged_rd, mergeStatuses, idoffsets, &checkAndOpMerge, hasBeenAnd); + subMergeLoop(subMerged, subMergedPositionData, merged, merged_rd, mergeStatuses, + idoffsets, &checkAndOpMerge, hasBeenAnd); // intersect checkAndOpMerge with merged for (auto& mergedDocInfo : merged) { if (!checkAndOpMerge[mergedDocInfo.id]) { @@ -639,22 +667,19 @@ void Selecter::mergeGroupResult(std::vector& rawResul } } template -template -void Selecter::addNewTerm(FtMergeStatuses::Statuses& mergeStatuses, MergeData& merged, std::vector& idoffsets, +template +void Selecter::addNewTerm(FtMergeStatuses::Statuses& mergeStatuses, MergeType& merged, std::vector& idoffsets, std::vector& curExists, const IdRelType& relid, index_t rawResIndex, int32_t termRank, int field) { const int vid = relid.Id(); MergeInfo info; info.id = vid; info.proc = termRank; info.field = field; - if (needArea_) { + + if constexpr (std::is_same_v&, decltype(merged)> || std::is_same_v&, decltype(merged)>) { auto& area = merged.vectorAreas.emplace_back(); info.areaIndex = merged.vectorAreas.size() - 1; area.ReserveField(fieldSize_); - for (auto pos : relid.Pos()) { - [[maybe_unused]] bool r = area.AddWord(pos.pos(), pos.field(), termRank, maxAreasInDoc_); - } - area.UpdateRank(termRank); } merged.push_back(std::move(info)); mergeStatuses[vid] = rawResIndex + 1; @@ -665,16 +690,27 @@ void Selecter::addNewTerm(FtMergeStatuses::Statuses& mergeStatuses, Merg } template -void Selecter::addAreas(MergeData& merged, int32_t areaIndex, const IdRelType& relid, int32_t termRank) { - if (needArea_) { - AreaHolder& area = merged.vectorAreas[areaIndex]; - for (auto pos : relid.Pos()) { - if (!area.AddWord(pos.pos(), pos.field(), termRank, maxAreasInDoc_)) { - break; - } +void Selecter::addAreas(AreasInDocument& area, const IdRelType& relid, int32_t termRank, + [[maybe_unused]] const TermRankInfo& termInf, [[maybe_unused]] const std::wstring& pattern) { + for (auto pos : relid.Pos()) { + if (!area.AddWord(Area(pos.pos(), pos.pos() + 1), pos.field(), termRank, maxAreasInDoc_)) { + break; } - area.UpdateRank(termRank); } + area.UpdateRank(termRank); +} + +template +void Selecter::addAreas(AreasInDocument& area, const IdRelType& relid, [[maybe_unused]] int32_t termRank, + const TermRankInfo& termInf, const std::wstring& pattern) { + utf16_to_utf8(pattern, const_cast(termInf.ftDslTerm)); + for (auto pos : relid.Pos()) { + if (!area.AddWord(AreaDebug(pos.pos(), pos.pos() + 1, termInf.ToString(), AreaDebug::PhraseMode::None), pos.field(), + termInf.termRank, -1)) { + break; + } + } + area.UpdateRank(termInf.termRank); } // idf=max(0.2, log((N-M+1)/M)/log(1+N)) @@ -692,9 +728,9 @@ void Selecter::addAreas(MergeData& merged, int32_t areaIndex, const IdRe // docRank=summ(max(subTermRank))*255/allmax // allmax=max(docRank) template -template +template void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, - MergeData& merged, std::vector& merged_rd, std::vector& idoffsets, + MergedType& merged, std::vector& merged_rd, std::vector& idoffsets, std::vector& curExists, const bool hasBeenAnd, const bool inTransaction, const RdxContext& rdxCtx) { const auto& vdocs = holder_.vdocs_; @@ -753,7 +789,10 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI } // Find field with max rank - auto [termRank, field] = calcTermRank(rawRes, bm25, relid, r.proc); + TermRankInfo termInf; + termInf.proc = r.proc; + termInf.pattern = r.pattern; + auto [termRank, field] = calcTermRank(rawRes, bm25, relid, termInf); if (!termRank) { continue; } @@ -764,13 +803,18 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI if (simple) { // one term if (vidStatus) { MergeInfo& info = merged[idoffsets[vid]]; - addAreas(merged, info.areaIndex, relid, termRank); + if constexpr (std::is_same_v, MergedType> || std::is_same_v, MergedType>) { + addAreas(merged.vectorAreas[info.areaIndex], relid, termRank, termInf, rawRes.term.pattern); + } if (info.proc < static_cast(termRank)) { info.proc = termRank; info.field = field; } } else if (merged.size() < holder_.cfg_->mergeLimit) { // add new addNewTerm(mergeStatuses, merged, idoffsets, curExists, relid, rawResIndex, int32_t(termRank), field); + if constexpr (std::is_same_v, MergedType> || std::is_same_v, MergedType>) { + addAreas(merged.vectorAreas.back(), relid, termRank, termInf, rawRes.term.pattern); + } } } else { if (vidStatus) { @@ -784,8 +828,9 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI normDist = bound(1.0 / double(std::max(distance, 1)), holder_.cfg_->distanceWeight, holder_.cfg_->distanceBoost); } int finalRank = normDist * termRank; - - addAreas(merged, info.areaIndex, relid, termRank); + if constexpr (std::is_same_v, MergedType> || std::is_same_v, MergedType>) { + addAreas(merged.vectorAreas[info.areaIndex], relid, termRank, termInf, rawRes.term.pattern); + } if (finalRank > curMerged_rd.rank) { info.proc -= curMerged_rd.rank; info.proc += finalRank; @@ -795,6 +840,10 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI curExists[vid] = true; } else if (merged.size() < holder_.cfg_->mergeLimit && !hasBeenAnd) { // add new addNewTerm(mergeStatuses, merged, idoffsets, curExists, relid, rawResIndex, termRank, field); + MergeInfo& info = merged[idoffsets[vid]]; + if constexpr (std::is_same_v, MergedType> || std::is_same_v, MergedType>) { + addAreas(merged.vectorAreas[info.areaIndex], relid, termRank, termInf, rawRes.term.pattern); + } merged_rd.emplace_back(IdRelType(std::move(relid)), int32_t(termRank), rawRes.term.opts.qpos); } } @@ -802,14 +851,13 @@ void Selecter::mergeIteration(TextSearchResults& rawRes, index_t rawResI } } template + template std::pair Selecter::calcTermRank(const TextSearchResults& rawRes, Calculator bm25Calc, const IdRelType& relid, - int proc) { + TermRankInfo& termInf) { // Find field with max rank int field = 0; - double termRank = 0.0; bool dontSkipCurTermRank = false; - double normBm25 = 0.0; h_vector ranksInFields; for (unsigned long long fieldsMask = relid.UsedFieldsMask(), f = 0; fieldsMask; ++f, fieldsMask >>= 1) { @@ -827,31 +875,31 @@ std::pair Selecter::calcTermRank(const TextSearchResults& r assertrx(f < rawRes.term.opts.fieldsOpts.size()); const auto fboost = rawRes.term.opts.fieldsOpts[f].boost; if (fboost) { - calcFieldBoost(bm25Calc, f, relid, rawRes.term.opts, proc, termRank, normBm25, dontSkipCurTermRank, ranksInFields, field); + calcFieldBoost(bm25Calc, f, relid, rawRes.term.opts, termInf, dontSkipCurTermRank, ranksInFields, field); } } - if (!termRank) { - return std::make_pair(termRank, field); + if (!termInf.termRank) { + return std::make_pair(termInf.termRank, field); } if (holder_.cfg_->summationRanksByFieldsRatio > 0) { boost::sort::pdqsort_branchless(ranksInFields.begin(), ranksInFields.end()); double k = holder_.cfg_->summationRanksByFieldsRatio; for (auto rank : ranksInFields) { - termRank += (k * rank); + termInf.termRank += (k * rank); k *= holder_.cfg_->summationRanksByFieldsRatio; } } - return std::make_pair(termRank, field); + return std::make_pair(termInf.termRank, field); } template -template +template void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, - MergeData& merged, std::vector

& merged_rd, std::vector& idoffsets, - std::vector& present, const bool firstTerm, const bool inTransaction, - const RdxContext& rdxCtx) { + MergeType& merged, std::vector& mergedPos, + std::vector& idoffsets, std::vector& present, const bool firstTerm, + const bool inTransaction, const RdxContext& rdxCtx) { const auto& vdocs = holder_.vdocs_; const size_t totalDocsCount = vdocs.size(); @@ -886,7 +934,10 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra } // Find field with max rank - auto [termRank, field] = calcTermRank(rawRes, bm25, relid, r.proc); + TermRankInfo termInf; + termInf.proc = r.proc; + termInf.pattern = r.pattern; + auto [termRank, field] = calcTermRank(rawRes, bm25, relid, termInf); if (!termRank) { continue; } @@ -898,8 +949,18 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra // match of 2-rd, and next terms if (!firstTerm) { auto& curMerged = merged[idoffsets[vid]]; - auto& curMergedPos = merged_rd[idoffsets[vid]]; - int minDist = curMergedPos.cur.MergeWithDist(relid, rawRes.term.opts.distance, curMergedPos.posTmp); + auto& curMergedPos = mergedPos[idoffsets[vid]]; + int minDist = -1; + if constexpr (isGroupMergeWithAreas()) { + if constexpr (std::is_same_v) { + utf16_to_utf8(rawRes.term.pattern, termInf.ftDslTerm); + minDist = curMergedPos.cur.MergeWithDist(relid, rawRes.term.opts.distance, curMergedPos.posTmp, termInf.ToString()); + } else { + minDist = curMergedPos.cur.MergeWithDist(relid, rawRes.term.opts.distance, curMergedPos.posTmp, ""); + } + } else { + minDist = curMergedPos.cur.MergeWithDist(relid, rawRes.term.opts.distance, curMergedPos.posTmp, ""); + } if (!curMergedPos.posTmp.empty()) { present[vid] = true; double normDist = bound(1.0 / minDist, holder_.cfg_->distanceWeight, holder_.cfg_->distanceBoost); @@ -915,14 +976,21 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra } } else { if (vidStatus) { - if constexpr (isGroupMergeWithAreas

()) { - auto& pos = merged_rd[idoffsets[vid]].posTmp; + if constexpr (isGroupMergeWithAreas()) { + auto& pos = mergedPos[idoffsets[vid]].posTmp; pos.reserve(pos.size() + relid.Size()); for (const auto& p : relid.Pos()) { - pos.emplace_back(p, -1); + if constexpr (std::is_same_v) { + pos.emplace_back(p, -1); + } else if constexpr (std::is_same_v) { + utf16_to_utf8(rawRes.term.pattern, termInf.ftDslTerm); + pos.emplace_back(PosTypeDebug(p, termInf.ToString()), -1); + } else { + static_assert(!sizeof(MergedIdRelGroupType), "incorrect MergedIdRelGroupType::TypeTParam type"); + } } } else { - auto& pos = merged_rd[idoffsets[vid]].posTmp; + auto& pos = mergedPos[idoffsets[vid]].posTmp; pos.reserve(pos.Size() + relid.Size()); for (const auto& p : relid.Pos()) { @@ -938,16 +1006,24 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra mergeStatuses[vid] = rawResIndex + 1; present[vid] = true; idoffsets[vid] = merged.size() - 1; - if constexpr (isGroupMergeWithAreas

()) { - RVector, 4> posTmp; + if constexpr (isGroupMergeWithAreas()) { + RVector, 4> posTmp; posTmp.reserve(relid.Size()); for (const auto& p : relid.Pos()) { - posTmp.emplace_back(p, -1); + if constexpr (std::is_same_v>) { + posTmp.emplace_back(p, -1); + } else if constexpr (std::is_same_v>) { + utf16_to_utf8(rawRes.term.pattern, termInf.ftDslTerm); + PosTypeDebug pd{p, termInf.ToString()}; + posTmp.emplace_back(pd, -1); + } else { + static_assert(!sizeof(MergedIdRelGroupType), "incorrect MergedIdRelGroupType type"); + } } - merged_rd.emplace_back(IdRelType(std::move(relid)), int(termRank), rawRes.term.opts.qpos, std::move(posTmp)); + mergedPos.emplace_back(IdRelType(std::move(relid)), int(termRank), rawRes.term.opts.qpos, std::move(posTmp)); } else { - merged_rd.emplace_back(IdRelType(std::move(relid)), int(termRank), rawRes.term.opts.qpos); + mergedPos.emplace_back(IdRelType(std::move(relid)), int(termRank), rawRes.term.opts.qpos); } } } @@ -955,7 +1031,7 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra } for (size_t mergedIndex = 0; mergedIndex < merged.size(); mergedIndex++) { auto& mergedInfo = merged[mergedIndex]; - auto& mergedPosInfo = merged_rd[mergedIndex]; + auto& mergedPosInfo = mergedPos[mergedIndex]; if (mergedPosInfo.posTmp.empty()) { mergedInfo.proc = 0; mergeStatuses[mergedInfo.id] = 0; @@ -964,7 +1040,7 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra mergedPosInfo.rank = 0; continue; } - if constexpr (isGroupMerge

()) { + if constexpr (isGroupMerge()) { mergedPosInfo.posTmp.SortAndUnique(); mergedPosInfo.cur = std::move(mergedPosInfo.posTmp); mergedPosInfo.next.Clear(); @@ -972,9 +1048,10 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra mergedPosInfo.rank = 0; } else { auto& posTmp = mergedPosInfo.posTmp; - boost::sort::pdqsort_branchless(posTmp.begin(), posTmp.end(), - [](const std::pair& l, - const std::pair& r) noexcept { return l.first < r.first; }); + boost::sort::pdqsort_branchless( + posTmp.begin(), posTmp.end(), + [](const std::pair& l, + const std::pair& r) noexcept { return l.first < r.first; }); auto last = std::unique(posTmp.begin(), posTmp.end()); posTmp.resize(last - posTmp.begin()); @@ -992,9 +1069,9 @@ void Selecter::mergeIterationGroup(TextSearchResults& rawRes, index_t ra } template -template -void Selecter::mergeResultsPart(std::vector& rawResults, size_t from, size_t to, MergeData& merged, - std::vector& mergedPos, const bool inTransaction, const RdxContext& rdxCtx) { +template +void Selecter::mergeResultsPart(std::vector& rawResults, size_t from, size_t to, MergeType& merged, + std::vector& mergedPos, const bool inTransaction, const RdxContext& rdxCtx) { // Current implementation supports OpAnd only assertrx_throw(to <= rawResults.size()); FtMergeStatuses::Statuses mergeStatuses; @@ -1013,8 +1090,8 @@ void Selecter::mergeResultsPart(std::vector& rawResul std::vector exists; bool firstTerm = true; for (size_t i = from; i < to; ++i) { - mergeIterationGroup(rawResults[i], i, mergeStatuses, merged, mergedPos, idoffsets, exists, firstTerm, - inTransaction, rdxCtx); + mergeIterationGroup(rawResults[i], i, mergeStatuses, merged, mergedPos, + idoffsets, exists, firstTerm, inTransaction, rdxCtx); firstTerm = false; // set proc=0 (exclude) for document not containing term for (auto& info : merged) { @@ -1282,13 +1359,13 @@ bool Selecter::TyposHandler::isWordFitMaxLettPerm(const std::string_view } template -template -MergeData Selecter::mergeResults(std::vector&& rawResults, size_t maxMergedSize, - const std::vector& synonymsBounds, bool inTransaction, FtSortType ftSortType, - FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext& rdxCtx) { +template +MergedType Selecter::mergeResults(std::vector&& rawResults, size_t maxMergedSize, + const std::vector& synonymsBounds, bool inTransaction, FtSortType ftSortType, + FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext& rdxCtx) { const auto& vdocs = holder_.vdocs_; - MergeData merged; + MergedType merged; if (!rawResults.size() || !vdocs.size()) { return merged; } @@ -1322,12 +1399,15 @@ MergeData Selecter::mergeResults(std::vector&& rawRes rawResults[k].term.opts.op = OpAnd; k++; } - if (needArea_) { - mergeGroupResult(rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, idoffsets, - inTransaction, rdxCtx); + if constexpr (std::is_same_v>) { + mergeGroupResult, Bm25T, MergedOffsetT, MergedType>( + rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, idoffsets, inTransaction, rdxCtx); + } else if constexpr (std::is_same_v>) { + mergeGroupResult, Bm25T, MergedOffsetT, MergedType>( + rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, idoffsets, inTransaction, rdxCtx); } else { - mergeGroupResult(rawResults, i, k, mergeStatuses, merged, merged_rd, op, hasBeenAnd, idoffsets, - inTransaction, rdxCtx); + mergeGroupResult(rawResults, i, k, mergeStatuses, merged, merged_rd, op, + hasBeenAnd, idoffsets, inTransaction, rdxCtx); } if (op == OpAnd) { hasBeenAnd = true; @@ -1441,14 +1521,37 @@ void Selecter::printVariants(const FtSelectContext& ctx, const TextSearc } template class Selecter; -template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, +template MergeDataBase Selecter::Process(FtDSLQuery&&, bool, FtSortType, + FtMergeStatuses::Statuses&&, + const RdxContext&); +template MergeData Selecter::Process>(FtDSLQuery&&, bool, FtSortType, + FtMergeStatuses::Statuses&&, + const RdxContext&); +template MergeData Selecter::Process>(FtDSLQuery&&, bool, + FtSortType, + FtMergeStatuses::Statuses&&, + const RdxContext&); + +template MergeDataBase Selecter::Process(FtDSLQuery&&, bool, FtSortType, + FtMergeStatuses::Statuses&&, const RdxContext&); +template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, + FtMergeStatuses::Statuses&&, const RdxContext&); +template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, + FtMergeStatuses::Statuses&&, const RdxContext&); + +template class Selecter; +template MergeDataBase Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, + const RdxContext&); +template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, const RdxContext&); -template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, +template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, + FtMergeStatuses::Statuses&&, const RdxContext&); + +template MergeDataBase Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, + const RdxContext&); +template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, const RdxContext&); -template class Selecter; -template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, - const RdxContext&); -template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, FtMergeStatuses::Statuses&&, - const RdxContext&); +template MergeData Selecter::Process(FtDSLQuery&&, bool, FtSortType, + FtMergeStatuses::Statuses&&, const RdxContext&); } // namespace reindexer diff --git a/cpp_src/core/ft/ft_fast/selecter.h b/cpp_src/core/ft/ft_fast/selecter.h index 0e159f484..7dc769f83 100644 --- a/cpp_src/core/ft/ft_fast/selecter.h +++ b/cpp_src/core/ft/ft_fast/selecter.h @@ -6,6 +6,7 @@ namespace reindexer { +// Final information about found document struct MergeInfo { IdType id; // Virtual id of merged document (index in vdocs) int32_t proc; // Rank of document @@ -13,9 +14,15 @@ struct MergeInfo { int8_t field; // Field index, where was match }; -struct MergeData : public std::vector { +struct MergeDataBase : public std::vector { + virtual ~MergeDataBase() {} int maxRank = 0; - std::vector vectorAreas; +}; + +template +struct MergeData : public MergeDataBase { + using AT = AreaType; + std::vector> vectorAreas; }; template @@ -23,10 +30,10 @@ class Selecter { typedef fast_hash_map, WordIdTypeHash, WordIdTypeEqual, WordIdTypeLess> FoundWordsType; public: - Selecter(DataHolder& holder, size_t fieldSize, bool needArea, int maxAreasInDoc) - : holder_(holder), fieldSize_(fieldSize), needArea_(needArea), maxAreasInDoc_(maxAreasInDoc) {} + Selecter(DataHolder& holder, size_t fieldSize, int maxAreasInDoc) + : holder_(holder), fieldSize_(fieldSize), maxAreasInDoc_(maxAreasInDoc) {} - // Intermediate information about found document in current merge step. Used only for queries with 2 or more terms + // Intermediate information about document found at current merge step. Used only for queries with 2 or more terms struct MergedIdRel { explicit MergedIdRel(IdRelType&& c, int r, int q) : next(std::move(c)), rank(r), qpos(q) {} explicit MergedIdRel(int r, int q) : rank(r), qpos(q) {} @@ -37,24 +44,26 @@ class Selecter { int32_t qpos; // Position in query }; - struct MergedIdRelEx : public MergedIdRel { - explicit MergedIdRelEx(IdRelType&& c, int r, int q) : MergedIdRel(r, q), posTmp(std::move(c)) {} - MergedIdRelEx(MergedIdRelEx&&) = default; - IdRelType posTmp; // For group only. Collect all positions for subpatterns and the index in the vector with which we merged + struct MergedIdRelGroup : public MergedIdRel { + explicit MergedIdRelGroup(IdRelType&& c, int r, int q) : MergedIdRel(r, q), posTmp(std::move(c)) {} + MergedIdRelGroup(MergedIdRelGroup&&) = default; + IdRelType posTmp; // Group only. Collect all positions for subpatterns and index into vector we merged with }; - struct MergedIdRelExArea : public MergedIdRel { - MergedIdRelExArea(IdRelType&& c, int r, int q, RVector, 4>&& p) + template + struct MergedIdRelGroupArea : public MergedIdRel { + using TypeTParam = PosT; + MergedIdRelGroupArea(IdRelType&& c, int r, int q, RVector, 4>&& p) : MergedIdRel(std::move(c), r, q), posTmp(std::move(p)) {} - MergedIdRelExArea(MergedIdRelExArea&&) = default; + MergedIdRelGroupArea(MergedIdRelGroupArea&&) = default; - RVector, 4> + RVector, 4> posTmp; // For group only. Collect all positions for subpatterns and the index in the vector with which we merged - h_vector, 4>, 2> wordPosForChain; + RVector, 4>, 2> wordPosForChain; }; - template - MergeData Process(FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses::Statuses&& mergeStatuses, + template + MergeType Process(FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext&); private: @@ -65,6 +74,25 @@ class Selecter { int16_t wordLen; }; + struct TermRankInfo { + int32_t termRank = 0; + double bm25Norm = 0.0; + double termLenBoost = 0.0; + double positionRank = 0.0; + double normDist = 0.0; + double proc = 0.0; + double fullMatchBoost = 0.0; + std::string_view pattern; + std::string ftDslTerm; + + std::string ToString() const { + return fmt::format( + "{{termRank={} term='{}' pattern='{}' bm25_norm={} termLenBoost={} positionRank={} normDist={} proc={} " + "fullMatchBoost={}}} ", + termRank, ftDslTerm, pattern, bm25Norm, termLenBoost, positionRank, normDist, proc, fullMatchBoost); + } + }; + struct FtVariantEntry { FtVariantEntry() = default; FtVariantEntry(std::string p, FtDslOpts o, int pr, int c) : pattern{std::move(p)}, opts{std::move(o)}, proc{pr}, charsCount{c} {} @@ -178,52 +206,69 @@ class Selecter { std::wstring foundWordUTF16_; }; - template - MergeData mergeResults(std::vector&& rawResults, size_t maxMergedSize, const std::vector& synonymsBounds, + template + MergeType mergeResults(std::vector&& rawResults, size_t totalORVids, const std::vector& synonymsBounds, bool inTransaction, FtSortType ftSortType, FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext&); - template - void mergeIteration(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, MergeData& merged, + template + void mergeIteration(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, MergeType& merged, std::vector& merged_rd, std::vector& idoffsets, std::vector& curExists, const bool hasBeenAnd, const bool inTransaction, const RdxContext&); - template - void mergeIterationGroup(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, MergeData& merged, - std::vector

& merged_rd, std::vector& idoffsets, std::vector& present, + template + void mergeIterationGroup(TextSearchResults& rawRes, index_t rawResIndex, FtMergeStatuses::Statuses& mergeStatuses, MergeType& merged, + std::vector& mergedPos, std::vector& idoffsets, std::vector& present, const bool firstTerm, const bool inTransaction, const RdxContext& rdxCtx); - template + template void mergeGroupResult(std::vector& rawResults, size_t from, size_t to, FtMergeStatuses::Statuses& mergeStatuses, - MergeData& merged, std::vector& merged_rd, OpType op, const bool hasBeenAnd, + MergeType& merged, std::vector& merged_rd, OpType op, const bool hasBeenAnd, std::vector& idoffsets, const bool inTransaction, const RdxContext& rdxCtx); - template - void mergeResultsPart(std::vector& rawResults, size_t from, size_t to, MergeData& merged, + template + void mergeResultsPart(std::vector& rawResults, size_t from, size_t to, MergeType& merged, std::vector& mergedPos, const bool inTransaction, const RdxContext& rdxCtx); - AreaHolder createAreaFromSubMerge(const MergedIdRelExArea& posInfo); - void copyAreas(AreaHolder& subMerged, AreaHolder& merged, int32_t rank); - template - void subMergeLoop(std::vector& subMerged, std::vector& subMergedPos, MergeData& merged, - std::vector& merged_rd, FtMergeStatuses::Statuses& mergeStatuses, std::vector& idoffsets, - std::vector* checkAndOpMerge, const bool hasBeenAnd); + template + AreasInDocument createAreaFromSubMerge(const MergedIdRelGroupArea& posInfo); + + template + void insertSubMergeArea(const MergedIdRelGroupArea& posInfo, PosT cur, int prevIndex, AreasInDocument& area); + + template + void insertSubMergeArea(const MergedIdRelGroupArea& posInfo, PosT cur, int prevIndex, AreasInDocument& area); + + template + void copyAreas(AreasInDocument& subMerged, AreasInDocument& merged, int32_t rank); + + template + void subMergeLoop(MergeType& subMerged, std::vector& subMergedPos, MergeType& merged, std::vector& merged_rd, + FtMergeStatuses::Statuses& mergeStatuses, std::vector& idoffsets, std::vector* checkAndOpMerge, + const bool hasBeenAnd); template - void calcFieldBoost(const Calculator& bm25Calc, unsigned long long f, const IdRelType& relid, const FtDslOpts& opts, int termProc, - double& termRank, double& normBm25, bool& dontSkipCurTermRank, h_vector& ranksInFields, int& field); + void calcFieldBoost(const Calculator& bm25Calc, unsigned long long f, const IdRelType& relid, const FtDslOpts& opts, + TermRankInfo& termInf, bool& dontSkipCurTermRank, h_vector& ranksInFields, int& field); + template - std::pair calcTermRank(const TextSearchResults& rawRes, Calculator c, const IdRelType& relid, int proc); + std::pair calcTermRank(const TextSearchResults& rawRes, Calculator c, const IdRelType& relid, TermRankInfo& termInf); - template - void addNewTerm(FtMergeStatuses::Statuses& mergeStatuses, MergeData& merged, std::vector& idoffsets, + template + void addNewTerm(FtMergeStatuses::Statuses& mergeStatuses, MergeType& merged, std::vector& idoffsets, std::vector& curExists, const IdRelType& relid, index_t rawResIndex, int32_t termRank, int field); - void addAreas(MergeData& merged, int32_t areaIndex, const IdRelType& relid, int32_t termRank); + void addAreas(AreasInDocument& area, const IdRelType& relid, int32_t termRank, const TermRankInfo& termInf, + const std::wstring& pattern); + void addAreas(AreasInDocument& area, const IdRelType& relid, int32_t termRank, const TermRankInfo& termInf, + const std::wstring& pattern); + + template + constexpr static bool IsOneOf = (... || std::is_same_v); template static constexpr bool isSingleTermMerge() noexcept { static_assert( - std::is_same_v || std::is_same_v || std::is_same_v, + IsOneOf, MergedIdRelGroupArea, MergedIdRel>, "unsupported type for mergeIteration"); return std::is_same_v; } @@ -231,21 +276,22 @@ class Selecter { template static constexpr bool isGroupMerge() noexcept { static_assert( - std::is_same_v || std::is_same_v || std::is_same_v, + IsOneOf, MergedIdRelGroupArea, MergedIdRel>, "unsupported type for mergeIteration"); - return std::is_same_v; + return std::is_same_v; } template static constexpr bool isGroupMergeWithAreas() noexcept { static_assert( - std::is_same_v || std::is_same_v || std::is_same_v, + IsOneOf, MergedIdRelGroupArea, MergedIdRel>, "unsupported type for mergeIteration"); - return std::is_same_v; + return std::is_same_v> || + std::is_same_v>; } - template - MergeData mergeResultsBmType(std::vector&& results, size_t totalORVids, const std::vector& synonymsBounds, + template + MergeType mergeResultsBmType(std::vector&& results, size_t totalORVids, const std::vector& synonymsBounds, bool inTransaction, FtSortType ftSortType, FtMergeStatuses::Statuses&& mergeStatuses, const RdxContext& rdxCtx); @@ -263,7 +309,6 @@ class Selecter { DataHolder& holder_; size_t fieldSize_; - const bool needArea_; int maxAreasInDoc_; }; diff --git a/cpp_src/core/ft/ftsetcashe.h b/cpp_src/core/ft/ftsetcashe.h index 6d3802baa..ed1ea8f74 100644 --- a/cpp_src/core/ft/ftsetcashe.h +++ b/cpp_src/core/ft/ftsetcashe.h @@ -8,12 +8,11 @@ namespace reindexer { struct FtIdSetCacheVal { FtIdSetCacheVal() = default; FtIdSetCacheVal(IdSet::Ptr&& i) noexcept : ids(std::move(i)) {} - FtIdSetCacheVal(IdSet::Ptr&& i, FtCtx::Data::Ptr&& c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} - + FtIdSetCacheVal(IdSet::Ptr&& i, FtCtxData::Ptr&& c) noexcept : ids(std::move(i)), ctx(std::move(c)) {} size_t Size() const noexcept { return ids ? (sizeof(*ids.get()) + ids->heap_size()) : 0; } IdSet::Ptr ids; - FtCtx::Data::Ptr ctx; + FtCtxData::Ptr ctx; }; using FtIdSetCache = LRUCache; diff --git a/cpp_src/core/ft/idrelset.cc b/cpp_src/core/ft/idrelset.cc index c1aeaf9e8..74cc28de9 100644 --- a/cpp_src/core/ft/idrelset.cc +++ b/cpp_src/core/ft/idrelset.cc @@ -76,4 +76,58 @@ int IdRelType::MinPositionInField(int field) const noexcept { return res; } +// RVector, 4> +// IdRelType +// RVector, 4> +template +int IdRelType::MergeWithDist(const IdRelType& newWordPos, unsigned int dist, PosTypeT& res, [[maybe_unused]] const std::string& inf) const { + unsigned int minDist = std::numeric_limits::max(); + auto rightIt = newWordPos.pos_.begin(); + const auto leftEnd = pos_.end(); + const auto rightEnd = newWordPos.pos_.end(); + for (auto leftIt = pos_.begin(); leftIt != leftEnd; ++leftIt) { + while (rightIt != rightEnd && rightIt->fpos < leftIt->fpos) { + ++rightIt; + } + // here right pos > left pos + if (rightIt == rightEnd) { + break; + } + if (rightIt->field() != leftIt->field()) { + continue; + } + + auto leftItNext = leftIt + 1; + uint32_t leftNextPos = std::numeric_limits::max(); + if (leftItNext != leftEnd) { + leftNextPos = leftItNext->pos(); + } + + while (rightIt != rightEnd && rightIt->field() == leftIt->field() && uint32_t(rightIt->pos()) < leftNextPos && + rightIt->fpos - leftIt->fpos <= dist) { + minDist = std::min(rightIt->fpos - leftIt->fpos, minDist); + if constexpr (std::is_same_v) { + res.Add(*rightIt); + } else if constexpr (std::is_same_v, 4>>) { + res.emplace_back(*rightIt, leftIt - pos_.begin()); + } else if constexpr (std::is_same_v, 4>>) { + res.emplace_back(PosTypeDebug{*rightIt, inf}, leftIt - pos_.begin()); + } else { + static_assert(!sizeof(PosTypeT), "incorrect PosType type "); + } + ++rightIt; + } + } + return minDist; +} + +template int IdRelType::MergeWithDist(const IdRelType& newWordPos, unsigned int dist, IdRelType& res, + const std::string& inf) const; +template int IdRelType::MergeWithDist, 4>>(const IdRelType& newWordPos, unsigned int dist, + RVector, 4>& res, + const std::string& inf) const; +template int IdRelType::MergeWithDist, 4>>(const IdRelType& newWordPos, unsigned int dist, + RVector, 4>& res, + const std::string&) const; + } // namespace reindexer diff --git a/cpp_src/core/ft/idrelset.h b/cpp_src/core/ft/idrelset.h index 05ed10950..fc967e460 100644 --- a/cpp_src/core/ft/idrelset.h +++ b/cpp_src/core/ft/idrelset.h @@ -48,46 +48,8 @@ class IdRelType { uint32_t fpos; }; - template - int MergeWithDist(const IdRelType& newWordPos, unsigned int dist, PosType& res) const { - int minDist = INT_MAX; - auto rightIt = newWordPos.pos_.begin(); - const auto leftEnd = pos_.end(); - const auto rightEnd = newWordPos.pos_.end(); - for (auto leftIt = pos_.begin(); leftIt != leftEnd; ++leftIt) { - while (rightIt != rightEnd && rightIt->fpos < leftIt->fpos) { - ++rightIt; - } - // here right pos > left pos - if (rightIt == rightEnd) { - break; - } - if (rightIt->field() != leftIt->field()) { - continue; - } - - auto leftItNext = leftIt + 1; - uint32_t leftNextPos = std::numeric_limits::max(); - if (leftItNext != leftEnd) { - leftNextPos = leftItNext->pos(); - } - - while (rightIt != rightEnd && rightIt->field() == leftIt->field() && uint32_t(rightIt->pos()) < leftNextPos && - rightIt->fpos - leftIt->fpos <= dist) { - int d = rightIt->fpos - leftIt->fpos; - if (d < minDist) { - minDist = d; - } - if constexpr (std::is_same_v) { - res.Add(*rightIt); - } else { - res.emplace_back(*rightIt, leftIt - pos_.begin()); - } - ++rightIt; - } - } - return minDist; - } + template + int MergeWithDist(const IdRelType& newWordPos, unsigned int dist, PosTypeT& res, const std::string& inf) const; void Add(int pos, int field) { assertrx_throw(0 <= field && field <= kMaxFtCompositeFields); @@ -141,6 +103,13 @@ class IdRelType { VDocIdType id_ = 0; // index of the document in which the word occurs }; +struct PosTypeDebug : public IdRelType::PosType { + PosTypeDebug() = default; + explicit PosTypeDebug(const IdRelType::PosType& pos, const std::string& inf) : IdRelType::PosType(pos), info(inf) {} + explicit PosTypeDebug(const IdRelType::PosType& pos, std::string&& inf) noexcept : IdRelType::PosType(pos), info(std::move(inf)) {} + std::string info; +}; + class IdRelSet : public std::vector { public: int Add(VDocIdType id, int pos, int field) { diff --git a/cpp_src/core/ft/usingcontainer.h b/cpp_src/core/ft/usingcontainer.h index 143376670..4edfe1549 100644 --- a/cpp_src/core/ft/usingcontainer.h +++ b/cpp_src/core/ft/usingcontainer.h @@ -11,11 +11,24 @@ namespace reindexer { template class RVector : public std::vector { public: + typedef unsigned size_type; + using std::vector::vector; template void clear() noexcept { std::vector::clear(); } + + size_t heap_size() { return std::vector::capacity() * sizeof(T); } + + static constexpr size_type max_size() noexcept { return std::numeric_limits::max() >> 1; } + + void grow(size_type sz) { + size_type cap = std::vector::capacity(); + if (sz > cap) { + std::vector::reserve(std::max(sz, std::min(max_size(), cap * 2))); + } + } }; #else template diff --git a/cpp_src/core/idset.h b/cpp_src/core/idset.h index d6235ac90..167370cc6 100644 --- a/cpp_src/core/idset.h +++ b/cpp_src/core/idset.h @@ -4,6 +4,7 @@ #include #include #include +#include "core/ft/usingcontainer.h" #include "cpp-btree/btree_set.h" #include "estl/h_vector.h" #include "estl/intrusive_ptr.h" @@ -12,7 +13,7 @@ namespace reindexer { -using base_idset = h_vector; +using base_idset = RVector; using base_idsetset = btree::btree_set; class IdSetPlain : protected base_idset { diff --git a/cpp_src/core/index/indextext/fastindextext.cc b/cpp_src/core/index/indextext/fastindextext.cc index fdded3b1f..4ff8b84e2 100644 --- a/cpp_src/core/index/indextext/fastindextext.cc +++ b/cpp_src/core/index/indextext/fastindextext.cc @@ -125,8 +125,9 @@ IndexMemStat FastIndexText::GetMemStat(const RdxContext& ctx) { return ret; } template -MergeData::iterator FastIndexText::unstableRemoveIf(MergeData& md, int minRelevancy, double scalingFactor, size_t& releventDocs, - int& cnt) { +template +typename MergeType::iterator FastIndexText::unstableRemoveIf(MergeType& md, int minRelevancy, double scalingFactor, size_t& releventDocs, + int& cnt) { if (md.empty()) { return md.begin(); } @@ -138,14 +139,11 @@ MergeData::iterator FastIndexText::unstableRemoveIf(MergeData& md, int minRel if (first == last) { return first; } - auto& vdoc = holder.vdocs_[first->id]; - if (!vdoc.keyEntry) { - break; - } first->proc *= scalingFactor; if (first->proc < minRelevancy) { break; } + auto& vdoc = holder.vdocs_[first->id]; assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); ++releventDocs; @@ -157,9 +155,6 @@ MergeData::iterator FastIndexText::unstableRemoveIf(MergeData& md, int minRel if (first == last) { return first; } - if (!holder.vdocs_[last->id].keyEntry) { - continue; - } last->proc *= scalingFactor; if (last->proc >= minRelevancy) { break; @@ -176,53 +171,20 @@ MergeData::iterator FastIndexText::unstableRemoveIf(MergeData& md, int minRel } template -IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, - FtMergeStatuses&& statuses, FtUseExternStatuses useExternSt, const RdxContext& rdxCtx) { - fctx->GetData()->extraWordSymbols_ = this->getConfig()->extraWordSymbols; - fctx->GetData()->isWordPositions_ = true; - - MergeData mergeData; - switch (holder_->cfg_->optimization) { - case FtFastConfig::Optimization::Memory: { - DataHolder* d = dynamic_cast*>(holder_.get()); - assertrx_throw(d); - Selecter selecter{*d, this->Fields().size(), fctx->NeedArea(), holder_->cfg_->maxAreasInDoc}; - if (useExternSt == FtUseExternStatuses::No) { - mergeData = selecter.Process(std::move(dsl), inTransaction, ftSortType, - std::move(statuses.statuses), rdxCtx); - } else { - mergeData = selecter.Process(std::move(dsl), inTransaction, ftSortType, - std::move(statuses.statuses), rdxCtx); - } - break; - } - case FtFastConfig::Optimization::CPU: { - DataHolder* d = dynamic_cast*>(holder_.get()); - assertrx_throw(d); - Selecter selecter{*d, this->Fields().size(), fctx->NeedArea(), holder_->cfg_->maxAreasInDoc}; - if (useExternSt == FtUseExternStatuses::No) { - mergeData = selecter.Process(std::move(dsl), inTransaction, ftSortType, - std::move(statuses.statuses), rdxCtx); - } else { - mergeData = selecter.Process(std::move(dsl), inTransaction, ftSortType, - std::move(statuses.statuses), rdxCtx); - } - break; - } - default: - assertrx_throw(0); - } - +template +IdSet::Ptr FastIndexText::afterSelect(FtCtx& fctx, MergeType&& mergeData, FtSortType ftSortType, FtMergeStatuses&& statuses, + FtUseExternStatuses useExternSt) { // convert vids(uniq documents id) to ids (real ids) IdSet::Ptr mergedIds = make_intrusive>(); - auto& holder = *this->holder_; + auto& holder = *this->holder_; if (mergeData.empty()) { return mergedIds; } int cnt = 0; const double scalingFactor = mergeData.maxRank > 255 ? 255.0 / mergeData.maxRank : 1.0; const int minRelevancy = getConfig()->minRelevancy * 100 * scalingFactor; + size_t releventDocs = 0; switch (ftSortType) { case FtSortType::RankAndID: { @@ -233,14 +195,10 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTr case FtSortType::RankOnly: { for (auto& vid : mergeData) { auto& vdoc = holder.vdocs_[vid.id]; - if (!vdoc.keyEntry) { - continue; - } vid.proc *= scalingFactor; if (vid.proc <= minRelevancy) { break; } - assertrx_throw(!vdoc.keyEntry->Unsorted().empty()); cnt += vdoc.keyEntry->Sorted(0).size(); ++releventDocs; @@ -253,47 +211,47 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTr } mergedIds->reserve(cnt); - fctx->Reserve(cnt); - if (!fctx->NeedArea()) { + if constexpr (std::is_same_v) { if (useExternSt == FtUseExternStatuses::No) { appendMergedIds(mergeData, releventDocs, [&fctx, &mergedIds](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { - fctx->Add(ebegin, eend, vid.proc); + fctx.Add(ebegin, eend, vid.proc); mergedIds->Append(ebegin, eend, IdSet::Unordered); }); } else { appendMergedIds(mergeData, releventDocs, [&fctx, &mergedIds, &statuses](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { - fctx->Add(ebegin, eend, vid.proc, statuses.rowIds); + fctx.Add(ebegin, eend, vid.proc, statuses.rowIds); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); }); } - } else { + } else if constexpr (std::is_same_v, MergeType> || std::is_same_v, MergeType>) { if (useExternSt == FtUseExternStatuses::No) { appendMergedIds(mergeData, releventDocs, [&fctx, &mergedIds, &mergeData](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { - assertrx_throw(vid.areaIndex != std::numeric_limits::max()); - fctx->Add(ebegin, eend, vid.proc, std::move(mergeData.vectorAreas[vid.areaIndex])); + fctx.Add(ebegin, eend, vid.proc, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, IdSet::Unordered); }); + } else { appendMergedIds( mergeData, releventDocs, - [&fctx, &mergedIds, &mergeData, &statuses](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { - assertrx_throw(vid.areaIndex != std::numeric_limits::max()); - fctx->Add(ebegin, eend, vid.proc, statuses.rowIds, std::move(mergeData.vectorAreas[vid.areaIndex])); + [&fctx, &mergedIds, &statuses, &mergeData](IdSetCRef::iterator ebegin, IdSetCRef::iterator eend, const MergeInfo& vid) { + fctx.Add(ebegin, eend, vid.proc, statuses.rowIds, std::move(mergeData.vectorAreas[vid.areaIndex])); mergedIds->Append(ebegin, eend, statuses.rowIds, IdSet::Unordered); }); } + } else { + static_assert(!sizeof(MergeType), "incorrect MergeType"); } + if rx_unlikely (getConfig()->logLevel >= LogInfo) { logPrintf(LogInfo, "Total merge out: %d ids", mergedIds->size()); - std::string str; - for (size_t i = 0; i < fctx->Size();) { + for (size_t i = 0; i < fctx.Size();) { size_t j = i; - for (; j < fctx->Size() && fctx->Proc(i) == fctx->Proc(j); j++); - str += std::to_string(fctx->Proc(i)) + "%"; + for (; j < fctx.Size() && fctx.Proc(i) == fctx.Proc(j); j++); + str += std::to_string(fctx.Proc(i)) + "%"; if (j - i > 1) { str += "("; str += std::to_string(j - i); @@ -302,9 +260,9 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTr str += " "; i = j; } - logPrintf(LogInfo, "Relevancy(%d): %s", fctx->Size(), str); + logPrintf(LogInfo, "Relevancy(%d): %s", fctx.Size(), str); } - assertrx_throw(mergedIds->size() == fctx->Size()); + assertrx_throw(mergedIds->size() == fctx.Size()); if (ftSortType == FtSortType::RankAndID) { std::vector sortIds; size_t nItems = mergedIds->size(); @@ -312,7 +270,7 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTr for (size_t i = 0; i < nItems; i++) { sortIds.emplace_back(i); } - std::vector& proc = fctx->GetData()->proc_; + std::vector& proc = fctx.GetData()->proc; boost::sort::pdqsort(sortIds.begin(), sortIds.end(), [&proc, mergedIds](size_t i1, size_t i2) { int p1 = proc[i1]; int p2 = proc[i2]; @@ -345,6 +303,74 @@ IdSet::Ptr FastIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTr } return mergedIds; } + +template +template +IdSet::Ptr FastIndexText::applyCtxTypeAndSelect(DataHolder* d, const BaseFunctionCtx::Ptr& bctx, FtDSLQuery&& dsl, + bool inTransaction, FtSortType ftSortType, FtMergeStatuses&& statuses, + FtUseExternStatuses useExternSt, const RdxContext& rdxCtx) { + Selecter selecter{*d, this->Fields().size(), holder_->cfg_->maxAreasInDoc}; + intrusive_ptr fctx = static_ctx_pointer_cast(bctx); + assertrx_throw(fctx); + fctx->SetExtraWordSymbols(this->getConfig()->extraWordSymbols); + fctx->SetWordPosition(true); + + switch (bctx->type) { + case BaseFunctionCtx::CtxType::kFtCtx: { + MergeDataBase mergeData = selecter.template Process( + std::move(dsl), inTransaction, ftSortType, std::move(statuses.statuses), rdxCtx); + return afterSelect(*fctx.get(), std::move(mergeData), ftSortType, std::move(statuses), useExternSt); + } + case BaseFunctionCtx::CtxType::kFtArea: { + MergeData mergeData = selecter.template Process>( + std::move(dsl), inTransaction, ftSortType, std::move(statuses.statuses), rdxCtx); + return afterSelect(*fctx.get(), std::move(mergeData), ftSortType, std::move(statuses), useExternSt); + } + case BaseFunctionCtx::CtxType::kFtAreaDebug: { + MergeData mergeData = selecter.template Process>( + std::move(dsl), inTransaction, ftSortType, std::move(statuses.statuses), rdxCtx); + return afterSelect(*fctx.get(), std::move(mergeData), ftSortType, std::move(statuses), useExternSt); + } + default: + throw_assert(false); + } +} + +template +template +IdSet::Ptr FastIndexText::applyOptimizationAndSelect(DataHolder* d, BaseFunctionCtx::Ptr bctx, FtDSLQuery&& dsl, + bool inTransaction, FtSortType ftSortType, FtMergeStatuses&& statuses, + FtUseExternStatuses useExternSt, const RdxContext& rdxCtx) { + if (useExternSt == FtUseExternStatuses::Yes) { + return applyCtxTypeAndSelect(d, std::move(bctx), std::move(dsl), inTransaction, ftSortType, + std::move(statuses), useExternSt, rdxCtx); + } else { + return applyCtxTypeAndSelect(d, std::move(bctx), std::move(dsl), inTransaction, ftSortType, + std::move(statuses), useExternSt, rdxCtx); + } +} + +template +IdSet::Ptr FastIndexText::Select(FtCtx::Ptr bctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, + FtMergeStatuses&& statuses, FtUseExternStatuses useExternSt, const RdxContext& rdxCtx) { + switch (holder_->cfg_->optimization) { + case FtFastConfig::Optimization::Memory: { + DataHolder* d = dynamic_cast*>(holder_.get()); + assertrx_throw(d); + return applyOptimizationAndSelect(d, bctx, std::move(dsl), inTransaction, ftSortType, std::move(statuses), + useExternSt, rdxCtx); + } + case FtFastConfig::Optimization::CPU: { + DataHolder* d = dynamic_cast*>(holder_.get()); + assertrx_throw(d); + return applyOptimizationAndSelect(d, bctx, std::move(dsl), inTransaction, ftSortType, std::move(statuses), + useExternSt, rdxCtx); + } + default: + assertrx_throw(false); + } +} + template void FastIndexText::commitFulltextImpl() { try { @@ -453,16 +479,13 @@ void FastIndexText::buildVdocs(Container& data) { } template -template -RX_ALWAYS_INLINE void FastIndexText::appendMergedIds(MergeData& mergeData, size_t releventDocs, F&& appender) { +template +RX_ALWAYS_INLINE void FastIndexText::appendMergedIds(MergeType& mergeData, size_t releventDocs, F&& appender) { auto& holder = *this->holder_; - for (size_t i = 0; i < releventDocs;) { + for (size_t i = 0; i < releventDocs; i++) { auto& vid = mergeData[i]; auto& vdoc = holder.vdocs_[vid.id]; - if (vdoc.keyEntry) { - appender(vdoc.keyEntry->Sorted(0).begin(), vdoc.keyEntry->Sorted(0).end(), vid); - i++; - } + appender(vdoc.keyEntry->Sorted(0).begin(), vdoc.keyEntry->Sorted(0).end(), vid); } } diff --git a/cpp_src/core/index/indextext/fastindextext.h b/cpp_src/core/index/indextext/fastindextext.h index 8e902d945..68c3afe29 100644 --- a/cpp_src/core/index/indextext/fastindextext.h +++ b/cpp_src/core/index/indextext/fastindextext.h @@ -7,8 +7,6 @@ namespace reindexer { -struct MergeData; - template class FastIndexText : public IndexText { using Base = IndexText; @@ -32,8 +30,9 @@ class FastIndexText : public IndexText { // Creates uncommited copy return std::make_unique>(*this); } - IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransactionbool, FtSortType ftSortType, FtMergeStatuses&&, - FtUseExternStatuses, const RdxContext&) override final; + IdSet::Ptr Select(FtCtx::Ptr ctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses&&, FtUseExternStatuses, + const RdxContext&) override final; + IndexMemStat GetMemStat(const RdxContext&) override final; Variant Upsert(const Variant& key, IdType id, bool& clearCache) override final; void Delete(const Variant& key, IdType id, StringsHolder&, bool& clearCache) override final; @@ -47,16 +46,30 @@ class FastIndexText : public IndexText { bool EnablePreselectBeforeFt() const override final { return getConfig()->enablePreselectBeforeFt; } private: + template + IdSet::Ptr afterSelect(FtCtx& fctx, MergeType&& mergeData, FtSortType ftSortType, FtMergeStatuses&& statuses, FtUseExternStatuses); + + template + IdSet::Ptr applyOptimizationAndSelect(DataHolder* d, BaseFunctionCtx::Ptr bctx, FtDSLQuery&& dsl, bool inTransaction, + FtSortType ftSortType, FtMergeStatuses&& statuses, FtUseExternStatuses, const RdxContext& rdxCtx); + + template + IdSet::Ptr applyCtxTypeAndSelect(DataHolder* d, const BaseFunctionCtx::Ptr& bctx, FtDSLQuery&& dsl, bool inTransaction, + FtSortType ftSortType, FtMergeStatuses&& statuses, FtUseExternStatuses useExternSt, + const RdxContext& rdxCtx); + +protected: void commitFulltextImpl() override final; FtFastConfig* getConfig() const noexcept { return dynamic_cast(this->cfg_.get()); } void initConfig(const FtFastConfig* = nullptr); void initHolder(FtFastConfig&); template void buildVdocs(Data& data); - template - void appendMergedIds(MergeData& merged, size_t releventDocs, F&& appender); - - MergeData::iterator unstableRemoveIf(MergeData& md, const int minRelevancy, double scalingFactor, size_t& releventDocs, int& cnt); + template + void appendMergedIds(MergeType& merged, size_t releventDocs, F&& appender); + template + typename MergeType::iterator unstableRemoveIf(MergeType& md, const int minRelevancy, double scalingFactor, size_t& releventDocs, + int& cnt); std::unique_ptr holder_; }; diff --git a/cpp_src/core/index/indextext/fuzzyindextext.cc b/cpp_src/core/index/indextext/fuzzyindextext.cc index 148db2e0c..908bec1e3 100644 --- a/cpp_src/core/index/indextext/fuzzyindextext.cc +++ b/cpp_src/core/index/indextext/fuzzyindextext.cc @@ -5,7 +5,8 @@ namespace reindexer { template -IdSet::Ptr FuzzyIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses&&, + +IdSet::Ptr FuzzyIndexText::Select(FtCtx::Ptr bctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses&&, FtUseExternStatuses withExternSt, const RdxContext& rdxCtx) { assertrx_throw(withExternSt == FtUseExternStatuses::No); (void)ftSortType; @@ -15,6 +16,7 @@ IdSet::Ptr FuzzyIndexText::Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inT auto mergedIds = make_intrusive>(); mergedIds->reserve(result.data_->size() * 2); + intrusive_ptr fctx = static_ctx_pointer_cast(bctx); fctx->Reserve(result.data_->size() * 2); double coof = 1; if (result.max_proc_ > 100) { diff --git a/cpp_src/core/index/indextext/fuzzyindextext.h b/cpp_src/core/index/indextext/fuzzyindextext.h index d92a0a210..6459025a6 100644 --- a/cpp_src/core/index/indextext/fuzzyindextext.h +++ b/cpp_src/core/index/indextext/fuzzyindextext.h @@ -22,9 +22,10 @@ class FuzzyIndexText : public IndexText { assertrx(0); abort(); } + std::unique_ptr Clone() const override final { return std::make_unique>(*this); } - IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses&&, FtUseExternStatuses, - const RdxContext&) override final; + IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses&&, + FtUseExternStatuses withExternSt, const RdxContext&) override final; Variant Upsert(const Variant& key, IdType id, bool& clearCache) override final { this->isBuilt_ = false; return Base::Upsert(key, id, clearCache); diff --git a/cpp_src/core/index/indextext/indextext.cc b/cpp_src/core/index/indextext/indextext.cc index 695456a7e..e1e5db9c0 100644 --- a/cpp_src/core/index/indextext/indextext.cc +++ b/cpp_src/core/index/indextext/indextext.cc @@ -71,16 +71,6 @@ void IndexText::ReconfigureCache(const NamespaceCacheConfigData& cacheCfg) { Base::ReconfigureCache(cacheCfg); } -template -FtCtx::Ptr IndexText::prepareFtCtx(const BaseFunctionCtx::Ptr& ctx) { - FtCtx::Ptr ftctx = reindexer::static_ctx_pointer_cast(ctx); - if rx_unlikely (!ftctx) { - throw Error(errParams, "Full text index (%s) may not be used without context", Index::Name()); - } - ftctx->PrepareAreas(ftFields_, this->name_); - return ftctx; -} - template void IndexText::build(const RdxContext& rdxCtx) { smart_lock lck(mtx_, rdxCtx); @@ -103,7 +93,6 @@ SelectKeyResults IndexText::SelectKey(const VariantArray& keys, CondType cond throw Error(errParams, "Full text index (%s) support only EQ or SET condition with 1 or 2 parameter", Index::Name()); } - FtCtx::Ptr ftctx = prepareFtCtx(ctx); auto mergeStatuses = this->GetFtMergeStatuses(rdxCtx); bool needPutCache = false; IdSetCacheKey ckey{keys, condition, 0}; @@ -111,23 +100,26 @@ SelectKeyResults IndexText::SelectKey(const VariantArray& keys, CondType cond if (cache_ft.valid) { if (!cache_ft.val.ids) { needPutCache = true; - } else if (ftctx->NeedArea() && (!cache_ft.val.ctx || !cache_ft.val.ctx->NeedArea())) { + } else if (ctx->type == BaseFunctionCtx::CtxType::kFtArea && + (!cache_ft.val.ctx || !(cache_ft.val.ctx->type == BaseFunctionCtx::CtxType::kFtArea))) { needPutCache = true; } else { - return resultFromCache(keys, std::move(cache_ft), ftctx); + return resultFromCache(keys, std::move(cache_ft), ctx); } } + return doSelectKey(keys, needPutCache ? std::optional{std::move(ckey)} : std::nullopt, std::move(mergeStatuses), - FtUseExternStatuses::No, opts.inTransaction, FtSortType(opts.ftSortType), std::move(ftctx), rdxCtx); + FtUseExternStatuses::No, opts.inTransaction, FtSortType(opts.ftSortType), ctx, rdxCtx); } template -SelectKeyResults IndexText::resultFromCache(const VariantArray& keys, FtIdSetCache::Iterator&& it, FtCtx::Ptr& ftctx) { +SelectKeyResults IndexText::resultFromCache(const VariantArray& keys, FtIdSetCache::Iterator&& it, const BaseFunctionCtx::Ptr& ctx) { if rx_unlikely (cfg_->logLevel >= LogInfo) { logPrintf(LogInfo, "Get search results for '%s' in '%s' from cache", keys[0].As(), this->payloadType_ ? this->payloadType_->Name() : ""); } assertrx(it.val.ctx); + FtCtx::Ptr ftctx = static_ctx_pointer_cast(ctx); ftctx->SetData(std::move(it.val.ctx)); return SelectKeyResult{{SingleSelectKeyResult{std::move(it.val.ids)}}}; } @@ -135,7 +127,7 @@ SelectKeyResults IndexText::resultFromCache(const VariantArray& keys, FtIdSet template SelectKeyResults IndexText::doSelectKey(const VariantArray& keys, const std::optional& ckey, FtMergeStatuses&& mergeStatuses, FtUseExternStatuses useExternSt, bool inTransaction, - FtSortType ftSortType, FtCtx::Ptr ftctx, const RdxContext& rdxCtx) { + FtSortType ftSortType, const BaseFunctionCtx::Ptr& bctx, const RdxContext& rdxCtx) { if rx_unlikely (cfg_->logLevel >= LogInfo) { logPrintf(LogInfo, "Searching for '%s' in '%s' %s", keys[0].As(), this->payloadType_ ? this->payloadType_->Name() : "", ckey ? "(will cache)" : ""); @@ -145,36 +137,40 @@ SelectKeyResults IndexText::doSelectKey(const VariantArray& keys, const std:: FtDSLQuery dsl(this->ftFields_, this->cfg_->stopWords, this->cfg_->extraWordSymbols); dsl.parse(keys[0].As()); - IdSet::Ptr mergedIds = Select(ftctx, std::move(dsl), inTransaction, ftSortType, std::move(mergeStatuses), useExternSt, rdxCtx); + auto ftCtx = static_ctx_pointer_cast(bctx); + IdSet::Ptr mergedIds = Select(ftCtx, std::move(dsl), inTransaction, ftSortType, std::move(mergeStatuses), useExternSt, rdxCtx); SelectKeyResult res; if (mergedIds) { + auto ftCtxDataBase = ftCtx->GetData(); bool need_put = (useExternSt == FtUseExternStatuses::No) && ckey.has_value(); - if (ftctx->NeedArea() && need_put && mergedIds->size()) { + // count the number of Areas and determine whether the request should be cached + if (bctx->type == BaseFunctionCtx::CtxType::kFtArea && need_put && mergedIds->size()) { auto config = dynamic_cast(cfg_.get()); + auto ftCtxDataArea = static_ctx_pointer_cast>(ftCtxDataBase); + if (config && config->maxTotalAreasToCache >= 0) { - auto& d = *ftctx->GetData(); size_t totalAreas = 0; - assertrx_throw(d.holders_.has_value()); - for (auto& area : d.holders_.value()) { - totalAreas += d.area_[area.second].GetAreasCount(); + assertrx_throw(ftCtxDataArea->holders.has_value()); + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + for (auto& area : ftCtxDataArea->holders.value()) { + totalAreas += ftCtxDataArea->area[area.second].GetAreasCount(); } + if (totalAreas > unsigned(config->maxTotalAreasToCache)) { need_put = false; } } - } - if (need_put && mergedIds->size()) { - // This areas will be shared via cache, so lazy commit may race - auto dPtr = ftctx->GetData(); - auto& d = *dPtr; - if (d.holders_.has_value()) { - for (auto& area : d.holders_.value()) { - if (auto& aData = d.area_[area.second]; !aData.IsCommited()) { + if (need_put && ftCtxDataArea->holders.has_value()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + for (auto& area : ftCtxDataArea->holders.value()) { + if (auto& aData = ftCtxDataArea->area[area.second]; !aData.IsCommited()) { aData.Commit(); } } } - cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(dPtr)}); + } + if (need_put && mergedIds->size()) { + cache_ft_->Put(*ckey, FtIdSetCacheVal{IdSet::Ptr(mergedIds), std::move(ftCtxDataBase)}); } res.emplace_back(std::move(mergedIds)); @@ -190,7 +186,7 @@ SelectKeyResults IndexText::SelectKey(const VariantArray& keys, CondType cond throw Error(errParams, "Full text index (%s) support only EQ or SET condition with 1 or 2 parameter", Index::Name()); } return doSelectKey(keys, std::nullopt, std::move(preselect), FtUseExternStatuses::Yes, opts.inTransaction, FtSortType(opts.ftSortType), - prepareFtCtx(ctx), rdxCtx); + ctx, rdxCtx); } template diff --git a/cpp_src/core/index/indextext/indextext.h b/cpp_src/core/index/indextext/indextext.h index 52b094da8..3a0af5a12 100644 --- a/cpp_src/core/index/indextext/indextext.h +++ b/cpp_src/core/index/indextext/indextext.h @@ -33,8 +33,9 @@ class IndexText : public IndexUnordered { SelectKeyResults SelectKey(const VariantArray& keys, CondType, Index::SelectOpts, const BaseFunctionCtx::Ptr&, FtPreselectT&&, const RdxContext&) override; void UpdateSortedIds(const UpdateSortedContext&) override {} - virtual IdSet::Ptr Select(FtCtx::Ptr fctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses&&, + virtual IdSet::Ptr Select(FtCtx::Ptr ctx, FtDSLQuery&& dsl, bool inTransaction, FtSortType ftSortType, FtMergeStatuses&&, FtUseExternStatuses, const RdxContext&) = 0; + void SetOpts(const IndexOpts& opts) override; void Commit() override final { // Do nothing @@ -65,10 +66,11 @@ class IndexText : public IndexUnordered { using Mutex = MarkedMutex; virtual void commitFulltextImpl() = 0; - FtCtx::Ptr prepareFtCtx(const BaseFunctionCtx::Ptr&); SelectKeyResults doSelectKey(const VariantArray& keys, const std::optional&, FtMergeStatuses&&, - FtUseExternStatuses useExternSt, bool inTransaction, FtSortType ftSortType, FtCtx::Ptr, const RdxContext&); - SelectKeyResults resultFromCache(const VariantArray& keys, FtIdSetCache::Iterator&&, FtCtx::Ptr&); + FtUseExternStatuses useExternSt, bool inTransaction, FtSortType ftSortType, + const BaseFunctionCtx::Ptr& ctx, const RdxContext&); + + SelectKeyResults resultFromCache(const VariantArray& keys, FtIdSetCache::Iterator&&, const BaseFunctionCtx::Ptr&); void build(const RdxContext& rdxCtx); void initSearchers(); diff --git a/cpp_src/core/keyvalue/variant.cc b/cpp_src/core/keyvalue/variant.cc index 5005ef5dd..9e645d481 100644 --- a/cpp_src/core/keyvalue/variant.cc +++ b/cpp_src/core/keyvalue/variant.cc @@ -520,31 +520,31 @@ class Comparator { return compare(v1_.As(), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Bool, KeyValueType::Int) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(int(v1_.As()), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Bool, KeyValueType::Int64) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(int64_t(v1_.As()), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Bool, KeyValueType::Double) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(double(v1_.As()), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Int, KeyValueType::Bool) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(v1_.As(), int(v2_.As())); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Int, KeyValueType::Int) const noexcept { return compare(v1_.As(), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Int, KeyValueType::Int64) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(int64_t(v1_.As()), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Int, KeyValueType::Double) const noexcept { return compare(v1_.As(), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Int64, KeyValueType::Bool) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(v1_.As(), int64_t(v2_.As())); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Int64, KeyValueType::Int) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(v1_.As(), int64_t(v2_.As())); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Int64, KeyValueType::Int64) const noexcept { return compare(v1_.As(), v2_.As()); @@ -553,7 +553,7 @@ class Comparator { return compare(v1_.As(), v2_.As()); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Double, KeyValueType::Bool) const noexcept { - return compare(v1_.As(), v2_.As()); + return compare(v1_.As(), double(v2_.As())); } RX_ALWAYS_INLINE ComparationResult operator()(KeyValueType::Double, KeyValueType::Int) const noexcept { return compare(v1_.As(), v2_.As()); diff --git a/cpp_src/core/namespace/itemsloader.cc b/cpp_src/core/namespace/itemsloader.cc index 1a7f59b44..89795271b 100644 --- a/cpp_src/core/namespace/itemsloader.cc +++ b/cpp_src/core/namespace/itemsloader.cc @@ -57,7 +57,7 @@ void ItemsLoader::reading() { throw Error(errLogic, "Can't load data storage of '%s' - there are no PK fields in ns", ns_.name_); } if (dataSlice.size() < sizeof(int64_t)) { - lastErr = Error(errParseBin, "Not enougth data in data slice"); + lastErr = Error(errParseBin, "Not enough data in data slice"); logPrintf(LogTrace, "Error load item to '%s' from storage: '%s'", ns_.name_, lastErr.what()); ++errCount; continue; @@ -66,7 +66,7 @@ void ItemsLoader::reading() { // Read LSN int64_t lsn = *reinterpret_cast(dataSlice.data()); if (lsn < 0) { - lastErr = Error(errParseBin, "Ivalid LSN value: %d", lsn); + lastErr = Error(errParseBin, "Invalid LSN value: %d", lsn); logPrintf(LogTrace, "Error load item to '%s' from storage: '%s'", ns_.name_, lastErr.what()); ++errCount; continue; @@ -114,7 +114,7 @@ void ItemsLoader::reading() { continue; } item.impl.Value().SetLSN(int64_t(l)); - // Prealloc payload here, because reading|parsing thread is faster then index insertion thread + // Preallocate payload here, because reading|parsing thread is faster than index insertion thread item.preallocPl = PayloadValue(item.impl.GetConstPayload().RealSize()); lck.lock(); diff --git a/cpp_src/core/namespace/namespaceimpl.cc b/cpp_src/core/namespace/namespaceimpl.cc index f32901545..b6236109a 100644 --- a/cpp_src/core/namespace/namespaceimpl.cc +++ b/cpp_src/core/namespace/namespaceimpl.cc @@ -3,7 +3,6 @@ #include #include #include "core/cjson/cjsondecoder.h" -// #include "core/cjson/defaultvaluecoder.h" #include "core/cjson/jsonbuilder.h" #include "core/cjson/uuid_recoders.h" #include "core/formatters/lsn_fmt.h" @@ -99,7 +98,7 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& optimizationState_{NotOptimized}, strHolder_{makeStringsHolder()}, nsUpdateSortedContextMemory_{0}, - dbDestroyed_(false) { + dbDestroyed_{false} { for (auto& idxIt : src.indexes_) { indexes_.push_back(idxIt->Clone()); } @@ -111,23 +110,23 @@ NamespaceImpl::NamespaceImpl(const NamespaceImpl& src, AsyncStorage::FullLockT& NamespaceImpl::NamespaceImpl(const std::string& name, UpdatesObservers& observers) : intrusive_atomic_rc_base(), - indexes_(*this), - name_(name), - payloadType_(name), - tagsMatcher_(payloadType_), - enablePerfCounters_(false), - queryCountCache_( - std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache)), - joinCache_(std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache)), + indexes_{*this}, + name_{name}, + payloadType_{name}, + tagsMatcher_{payloadType_}, + enablePerfCounters_{false}, + queryCountCache_{ + std::make_unique(config_.cacheConfig.queryCountCacheSize, config_.cacheConfig.queryCountHitsToCache)}, + joinCache_{std::make_unique(config_.cacheConfig.joinCacheSize, config_.cacheConfig.joinHitsToCache)}, wal_(getWalSize(config_)), - observers_(&observers), + observers_{&observers}, lastSelectTime_{0}, cancelCommitCnt_{0}, lastUpdateTime_{0}, - nsIsLoading_(false), - serverIdChanged_(false), + nsIsLoading_{false}, + serverIdChanged_{false}, strHolder_{makeStringsHolder()}, - dbDestroyed_(false) { + dbDestroyed_{false} { logPrintf(LogTrace, "NamespaceImpl::NamespaceImpl (%s)", name_); FlagGuardT nsLoadingGuard(nsIsLoading_); items_.reserve(10000); @@ -392,7 +391,7 @@ class NamespaceImpl::RollBack_recreateCompositeIndexes final : private RollBackB private: NamespaceImpl& ns_; std::vector> indexes_; - size_t startIdx_; + size_t startIdx_{0}; }; template <> @@ -476,32 +475,11 @@ class NamespaceImpl::RollBack_updateItems final : private RollBackBase { NamespaceImpl& ns_; RollBack_recreateCompositeIndexes rollbacker_recreateCompositeIndexes_; std::vector> items_; - uint64_t dataHash_; - size_t itemsDataSize_; + uint64_t dataHash_{0}; + size_t itemsDataSize_{0}; std::unique_ptr tuple_; }; -std::vector NamespaceImpl::pickJsonPath(const PayloadFieldType& fld) { - const auto& paths = fld.JsonPaths(); - if (fld.IsArray()) { - std::vector result; - result.reserve(paths.size()); - for (const auto& path : paths) { - auto tags = tagsMatcher_.path2tag(path, false); - result.push_back(std::move(tags)); - // first without nested path - always (any, now last one found) - if ((result.size() > 1) && (result.back().size() == 1)) { - std::swap(result.front(), result.back()); - } - } - return result; - } - - assertrx_throw(paths.size() == 1); - auto tags = tagsMatcher_.path2tag(paths.front(), false); - return {std::move(tags)}; -} - template <> class NamespaceImpl::RollBack_updateItems { public: @@ -565,16 +543,6 @@ NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(con recoder = std::make_unique(changedField); } } - // TODO: This logic must be reenabled after #1353. Now it's potentially unsafe - // else { - // const auto& indexToUpdate = indexes_[changedField]; - // if (!IsComposite(indexToUpdate->Type()) && !indexToUpdate->Opts().IsSparse()) { - // auto tagsNames = pickJsonPath(fld); - // if (!tagsNames.empty()) { - // recoder = std::make_unique(name_, fld, std::move(tagsNames), changedField); - // } - // } - // } } rollbacker.SaveTuple(); @@ -598,7 +566,6 @@ NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(con ItemImpl oldItem(oldPlType, plCurr, tagsMatcher_); oldItem.Unsafe(true); newItem.FromCJSON(oldItem, recoder.get()); - const bool itemTupleUpdated = recoder && recoder->Reset(); PayloadValue plNew = oldValue.CopyTo(payloadType_, fieldChangeType == FieldChangeType::Add); plNew.SetLSN(plCurr.GetLSN()); @@ -650,17 +617,6 @@ NamespaceImpl::RollBack_updateItems NamespaceImpl::updateItems(con plCurr = std::move(plNew); repl_.dataHash ^= Payload(payloadType_, plCurr).GetHash(); itemsDataSize_ += plCurr.GetCapacity() + sizeof(PayloadValue::dataHeader); - - // update data in storage - if (itemTupleUpdated && storage_.IsValid()) { - pk.Reset(); - data.Reset(); - pk << kRxStorageItemPrefix; - Payload(payloadType_, plCurr).SerializeFields(pk, pkFields()); - data.PutUInt64(plCurr.GetLSN()); - newItem.GetCJSON(data); - storage_.Write(pk.Slice(), data.Slice()); - } } markUpdated(IndexOptimization::Partial); @@ -1351,7 +1307,7 @@ int NamespaceImpl::getIndexByName(std::string_view index) const { } int NamespaceImpl::getIndexByNameOrJsonPath(std::string_view index) const { - int idx; + int idx = 0; if (tryGetIndexByName(index, idx)) { return idx; } @@ -1364,7 +1320,7 @@ int NamespaceImpl::getIndexByNameOrJsonPath(std::string_view index) const { } int NamespaceImpl::getScalarIndexByName(std::string_view index) const { - int idx; + int idx = 0; if (tryGetIndexByName(index, idx)) { if (idx < indexes_.firstCompositePos()) { return idx; @@ -1395,7 +1351,7 @@ bool NamespaceImpl::getIndexByNameOrJsonPath(std::string_view name, int& index) } bool NamespaceImpl::getScalarIndexByName(std::string_view name, int& index) const { - int idx; + int idx = 0; if (tryGetIndexByName(name, idx)) { if (idx < indexes_.firstCompositePos()) { index = idx; @@ -1609,7 +1565,7 @@ void NamespaceImpl::doDelete(IdType id) { storage_.Remove(pk.Slice()); // erase last item - int field; + int field = 0; // erase from composite indexes auto indexesCacheCleaner{GetIndexesCacheCleaner()}; @@ -2984,6 +2940,9 @@ void NamespaceImpl::removeExpiredItems(RdxActivityContext* ctx) { qr.AddNamespace(this, true); auto q = Query(name_).Where(index->Name(), CondLt, expirationThreshold); doDelete(q, qr, rdxCtx); + if (qr.Count()) { + logFmt(LogInfo, "{}: {} items were removed: TTL({}) has expired", name_, qr.Count(), index->Name()); + } } tryForceFlush(std::move(wlck)); } @@ -3228,8 +3187,7 @@ void NamespaceImpl::checkApplySlaveUpdate(bool fromReplication) { if (repl_.slaveMode && !repl_.replicatorEnabled) // readOnly { throw Error(errLogic, "Can't modify read only ns '%s'", name_); - } else if (repl_.slaveMode && repl_.replicatorEnabled) // slave - { + } else if (repl_.slaveMode && repl_.replicatorEnabled) { // slave if (!fromReplication) { logPrintf(LogTrace, "[repl:%s]:%d Can't modify slave ns '%s' repl_.slaveMode=%d repl_.replicatorenabled=%d fromReplication=%d", name_, serverId_, name_, repl_.slaveMode, repl_.replicatorEnabled, fromReplication); @@ -3237,8 +3195,7 @@ void NamespaceImpl::checkApplySlaveUpdate(bool fromReplication) { } else if (repl_.status == ReplicationState::Status::Fatal) { throw Error(errLogic, "Can't modify slave ns '%s', ns has fatal replication error: %s", name_, repl_.replError.what()); } - } else if (!repl_.slaveMode && !repl_.replicatorEnabled) // master - { + } else if (!repl_.slaveMode && !repl_.replicatorEnabled) { // master if (fromReplication) { throw Error(errLogic, "Can't modify master ns '%s' from replicator", name_); } else if (repl_.status == ReplicationState::Status::Fatal) { diff --git a/cpp_src/core/namespace/namespaceimpl.h b/cpp_src/core/namespace/namespaceimpl.h index 3f0a75824..2bd31dd74 100644 --- a/cpp_src/core/namespace/namespaceimpl.h +++ b/cpp_src/core/namespace/namespaceimpl.h @@ -488,7 +488,6 @@ class NamespaceImpl final : public intrusive_atomic_rc_base { // NOLINT(*perfor } size_t getWalSize(const NamespaceConfigData& cfg) const noexcept { return isSystem() ? int64_t(1) : std::max(cfg.walSize, int64_t(1)); } void clearNamespaceCaches(); - std::vector pickJsonPath(const PayloadFieldType& fld); PerfStatCounterMT updatePerfCounter_, selectPerfCounter_; std::atomic_bool enablePerfCounters_{false}; diff --git a/cpp_src/core/nsselecter/joinedselector.cc b/cpp_src/core/nsselecter/joinedselector.cc index 8ff29e42e..d726ae1f9 100644 --- a/cpp_src/core/nsselecter/joinedselector.cc +++ b/cpp_src/core/nsselecter/joinedselector.cc @@ -229,7 +229,7 @@ void JoinedSelector::AppendSelectIteratorOfJoinIndexData(SelectIteratorContainer [](const SelectIteratorContainer&) -> VariantArray { throw_as_assert; }}, preresult.payload); auto ctx = selectFnc ? selectFnc->CreateCtx(joinEntry.LeftIdxNo()) : BaseFunctionCtx::Ptr{}; - assertrx_throw(!ctx || ctx->type != BaseFunctionCtx::kFtCtx); + assertrx_throw(!ctx || ctx->type != BaseFunctionCtx::CtxType::kFtCtx); if (leftIndex->Opts().GetCollateMode() == CollateUTF8) { for (auto& key : values) { diff --git a/cpp_src/core/nsselecter/selectiteratorcontainer.cc b/cpp_src/core/nsselecter/selectiteratorcontainer.cc index dcdfe7428..a0194ecd7 100644 --- a/cpp_src/core/nsselecter/selectiteratorcontainer.cc +++ b/cpp_src/core/nsselecter/selectiteratorcontainer.cc @@ -309,7 +309,7 @@ SelectKeyResults SelectIteratorContainer::processQueryEntry(const QueryEntry& qe opts.inTransaction = ctx_->inTransaction; auto ctx = selectFnc ? selectFnc->CreateCtx(qe.IndexNo()) : BaseFunctionCtx::Ptr{}; - if (ctx && ctx->type == BaseFunctionCtx::kFtCtx) { + if (ctx) { ftCtx = reindexer::static_ctx_pointer_cast(ctx); } diff --git a/cpp_src/core/nsselecter/sortexpression.cc b/cpp_src/core/nsselecter/sortexpression.cc index 7acaa9d3e..d8523a680 100644 --- a/cpp_src/core/nsselecter/sortexpression.cc +++ b/cpp_src/core/nsselecter/sortexpression.cc @@ -64,11 +64,11 @@ VariantArray SortExpression::GetJoinedFieldValues(IdType rowId, const joins::Nam const std::vector& joinedSelectors, size_t nsIdx, std::string_view column, int index) { const auto& js = joinedSelectors[nsIdx]; - std::reference_wrapper pt = - std::visit(overloaded{[](const JoinPreResult::Values& values) noexcept { return std::cref(values.payloadType); }, - Restricted{}( - [&js](const auto&) noexcept { return std::cref(js.rightNs_->payloadType_); })}, - js.PreResult().payload); + std::reference_wrapper pt = std::visit( + overloaded{ + [](const JoinPreResult::Values& values) noexcept { return std::cref(values.payloadType); }, + Restricted{}([&js](const auto&) noexcept { return std::cref(js.rightNs_->payloadType_); })}, + js.PreResult().payload); const ConstPayload pv{pt, getJoinedValue(rowId, joinResults, joinedSelectors, nsIdx)}; VariantArray values; if (index == IndexValueType::SetByJsonPath) { @@ -156,11 +156,11 @@ double DistanceBetweenJoinedIndexes::GetValue(IdType rowId, const joins::Namespa double DistanceBetweenJoinedIndexesSameNs::GetValue(IdType rowId, const joins::NamespaceResults& joinResults, const std::vector& joinedSelectors) const { const auto& js = joinedSelectors[nsIdx]; - std::reference_wrapper pt = - std::visit(overloaded{[](const JoinPreResult::Values& values) noexcept { return std::cref(values.payloadType); }, - Restricted{}( - [&js](const auto&) noexcept { return std::cref(js.rightNs_->payloadType_); })}, - js.PreResult().payload); + std::reference_wrapper pt = std::visit( + overloaded{ + [](const JoinPreResult::Values& values) noexcept { return std::cref(values.payloadType); }, + Restricted{}([&js](const auto&) noexcept { return std::cref(js.rightNs_->payloadType_); })}, + js.PreResult().payload); const ConstPayload pv{pt, SortExpression::getJoinedValue(rowId, joinResults, joinedSelectors, nsIdx)}; TagsMatcher tm = std::visit(overloaded{[](const JoinPreResult::Values& values) noexcept { return std::cref(values.tagsMatcher); }, Restricted{}( diff --git a/cpp_src/core/payload/payloadiface.h b/cpp_src/core/payload/payloadiface.h index 672f3be04..b5629525e 100644 --- a/cpp_src/core/payload/payloadiface.h +++ b/cpp_src/core/payload/payloadiface.h @@ -84,7 +84,7 @@ class PayloadIface { template ::value>::type* = nullptr> void SetSingleElement(int field, const Variant& key); - // Set element or array by field index + // Set element or array by index path template ::value>::type* = nullptr> void Set(std::string_view field, const VariantArray& keys, bool append = false) { return Set(t_.FieldByName(field), keys, append); diff --git a/cpp_src/core/query/queryentry.h b/cpp_src/core/query/queryentry.h index 202b69f3d..44d454833 100644 --- a/cpp_src/core/query/queryentry.h +++ b/cpp_src/core/query/queryentry.h @@ -377,9 +377,9 @@ class QueryJoinEntry { QueryField rightField_; const OpType op_{OpOr}; const CondType condition_{CondAny}; - const bool reverseNamespacesOrder_{false}; ///< controls SQL encoding order - ///< false: mainNs.index Condition joinNs.joinIndex - ///< true: joinNs.joinIndex Invert(Condition) mainNs.index + const bool reverseNamespacesOrder_{false}; ///< controls SQL encoding order + ///< false: mainNs.index Condition joinNs.joinIndex + ///< true: joinNs.joinIndex Invert(Condition) mainNs.index }; enum class InjectionDirection : bool { IntoMain, FromMain }; diff --git a/cpp_src/core/query/sql/sqlparser.cc b/cpp_src/core/query/sql/sqlparser.cc index 6ba7e678c..40e4200ea 100644 --- a/cpp_src/core/query/sql/sqlparser.cc +++ b/cpp_src/core/query/sql/sqlparser.cc @@ -338,7 +338,7 @@ Variant token2kv(const token& tok, tokenizer& parser, CompositeAllowed allowComp compositeValues.push_back(token2kv(nextTok, parser, CompositeAllowed::No, FieldAllowed::No)); nextTok = parser.next_token(); if (nextTok.text() == "}"sv) { - return Variant(compositeValues); // end process + return Variant(compositeValues); // end process } if (nextTok.text() != ","sv) { throw Error(errParseSQL, "Expected ',', but found '%s' in query, %s", nextTok.text(), parser.where()); @@ -575,7 +575,7 @@ void SQLParser::parseCommand(tokenizer& parser) const { tok = parser.next_token(); // try parse as scalar value if ((tok.type == TokenNumber) || (tok.type == TokenString) || (tok.type == TokenName)) { - token2kv(tok, parser, CompositeAllowed::No, FieldAllowed::Yes); // ignore result + token2kv(tok, parser, CompositeAllowed::No, FieldAllowed::Yes); // ignore result } else { parseArray(parser, tok.text(), nullptr); } diff --git a/cpp_src/core/queryresults/queryresults.cc b/cpp_src/core/queryresults/queryresults.cc index 0a1ee037d..a56fbf265 100644 --- a/cpp_src/core/queryresults/queryresults.cc +++ b/cpp_src/core/queryresults/queryresults.cc @@ -8,6 +8,7 @@ #include "core/namespace/namespace.h" #include "core/namespace/namespaceimpl.h" #include "joinresults.h" +#include "server/outputparameters.h" #include "tools/catch_and_return.h" namespace reindexer { @@ -316,6 +317,9 @@ Error QueryResults::Iterator::GetProtobuf(WrSerializer& wrser, bool withHdrLen) auto& itemRef = qr_->items_[idx_]; assertrx(qr_->ctxs.size() > itemRef.Nsid()); auto& ctx = qr_->ctxs[itemRef.Nsid()]; + if (!ctx.schema_) { + return Error(errParams, "The schema was not found for Protobuf builder"); + } if (itemRef.Value().IsFree()) { return Error(errNotFound, "Item not found"); @@ -324,6 +328,10 @@ Error QueryResults::Iterator::GetProtobuf(WrSerializer& wrser, bool withHdrLen) ConstPayload pl(ctx.type_, itemRef.Value()); ProtobufEncoder encoder(&ctx.tagsMatcher_); ProtobufBuilder builder(&wrser, ObjType::TypePlain, ctx.schema_.get(), const_cast(&ctx.tagsMatcher_)); + + auto item = builder.Object(kProtoQueryResultsFields.at(kParamItems)); + auto ItemImpl = item.Object(ctx.schema_->GetProtobufNsNumber() + 1); + if (withHdrLen) { auto slicePosSaver = wrser.StartSlice(); encoder.Encode(pl, builder); diff --git a/cpp_src/core/selectfunc/ctx/basefunctionctx.h b/cpp_src/core/selectfunc/ctx/basefunctionctx.h index cebba4e26..261ff9960 100644 --- a/cpp_src/core/selectfunc/ctx/basefunctionctx.h +++ b/cpp_src/core/selectfunc/ctx/basefunctionctx.h @@ -1,5 +1,6 @@ #pragma once +#include "core/selectfunc/functions/debugrank.h" #include "core/selectfunc/functions/highlight.h" #include "core/selectfunc/functions/snippet.h" @@ -26,21 +27,22 @@ constexpr std::size_t variant_index() { } } -using SelectFuncVariant = std::variant; +using SelectFuncVariant = std::variant; enum class SelectFuncType { None = variant_index(), Snippet = variant_index(), Highlight = variant_index(), SnippetN = variant_index(), - + DebugRank = variant_index(), Max // Max possible value }; class BaseFunctionCtx : public intrusive_atomic_rc_base { public: typedef intrusive_ptr Ptr; - enum CtxType { kFtCtx = 0 }; - virtual ~BaseFunctionCtx() {} + enum class CtxType { kFtCtx = 1, kFtArea = 2, kFtAreaDebug = 3 }; + BaseFunctionCtx(CtxType t) noexcept : type(t) {} + virtual ~BaseFunctionCtx() = default; void AddFunction(const std::string& name, SelectFuncType functionIndx) { auto it = std::find_if(functions_.begin(), functions_.end(), [&name](const FuncData& data) { return data.name == name; }); @@ -67,7 +69,8 @@ class BaseFunctionCtx : public intrusive_atomic_rc_base { FuncData(std::string&& _name) noexcept : name(std::move(_name)) {} std::string name; - TypesArrayT types{}; + + TypesArrayT types = {}; }; h_vector functions_; }; diff --git a/cpp_src/core/selectfunc/ctx/ftctx.cc b/cpp_src/core/selectfunc/ctx/ftctx.cc index 5964c81a0..ff8b68922 100644 --- a/cpp_src/core/selectfunc/ctx/ftctx.cc +++ b/cpp_src/core/selectfunc/ctx/ftctx.cc @@ -2,38 +2,19 @@ namespace reindexer { -bool FtCtx::PrepareAreas(const RHashMap& fields, const std::string& name) { - assertrx_dbg(!NeedArea()); - auto& data = *data_; - if (!fields.empty()) { - data.isComposite_ = true; - } - - bool needArea = false; - if (data.isComposite_) { - for (auto& field : fields) { - needArea = CheckFunction(field.first, {SelectFuncType::Snippet, SelectFuncType::SnippetN, SelectFuncType::Highlight}); - if (needArea) { - break; - } - } - } - needArea = needArea || CheckFunction(name, {SelectFuncType::Snippet, SelectFuncType::SnippetN, SelectFuncType::Highlight}); - if (needArea) { - data.InitHolders(); - } - return needArea; -} - -template -void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, AreaHolder&& holder) { - auto& data = *data_; - data.area_.emplace_back(std::move(holder)); - for (; begin != end; ++begin) { - data.proc_.emplace_back(proc); - if (data.holders_.has_value()) { - data.holders_->emplace(*begin, data_->area_.size() - 1); - } +FtCtx::FtCtx(BaseFunctionCtx::CtxType t) : BaseFunctionCtx(t) { + switch (t) { + case BaseFunctionCtx::CtxType::kFtCtx: + data_ = make_intrusive(t); + break; + case BaseFunctionCtx::CtxType::kFtArea: + data_ = make_intrusive>(t); + data_->holders.emplace(); + break; + case BaseFunctionCtx::CtxType::kFtAreaDebug: + data_ = make_intrusive>(t); + data_->holders.emplace(); + break; } } @@ -41,42 +22,66 @@ template void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc) { auto& data = *data_; for (; begin != end; ++begin) { - data.proc_.emplace_back(proc); + data.proc.emplace_back(proc); } } template -void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector& mask, AreaHolder&& holder) { +void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector& mask) { auto& data = *data_; - data.area_.emplace_back(std::move(holder)); for (; begin != end; ++begin) { assertrx(static_cast(*begin) < mask.size()); if (!mask[*begin]) { continue; } - data.proc_.emplace_back(proc); - if (data.holders_.has_value()) { - data.holders_->emplace(*begin, data.area_.size() - 1); + data.proc.emplace_back(proc); + } +} + +template +void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, AreasInDocument&& areas) { + intrusive_ptr> dataArea = static_ctx_pointer_cast>(data_); + assertrx_throw(dataArea); + dataArea->area.emplace_back(std::move(areas)); + auto& data = *data_; + if (data.holders.has_value()) { + auto& holders = data.holders.value(); + for (; begin != end; ++begin) { + data.proc.push_back(proc); + holders.emplace(*begin, dataArea->area.size() - 1); } } } -template -void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector& mask) { +template +void FtCtx::Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector& mask, AreasInDocument&& areas) { + intrusive_ptr> dataArea = static_ctx_pointer_cast>(data_); + assertrx_throw(dataArea); auto& data = *data_; - for (; begin != end; ++begin) { - assertrx(static_cast(*begin) < mask.size()); - if (!mask[*begin]) { - continue; + dataArea->area.emplace_back(std::move(areas)); + if (data.holders.has_value()) { + auto& holders = data.holders.value(); + for (; begin != end; ++begin) { + assertrx_dbg(static_cast(*begin) < mask.size()); + if (!mask[*begin]) { + continue; + } + data.proc.push_back(proc); + holders.emplace(*begin, dataArea->area.size() - 1); } - data.proc_.emplace_back(proc); } } +template void FtCtx::Add::iterator, Area>(span::iterator begin, span::iterator end, + int16_t proc, AreasInDocument&& holder); +template void FtCtx::Add::iterator, Area>(span::iterator begin, span::iterator end, + int16_t proc, const std::vector&, AreasInDocument&& holder); + template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, - AreaHolder&& holder); + AreasInDocument&& holder); template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, - const std::vector&, AreaHolder&& holder); + const std::vector&, AreasInDocument&& holder); + template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc); template void FtCtx::Add::iterator>(span::iterator begin, span::iterator end, int16_t proc, const std::vector&); diff --git a/cpp_src/core/selectfunc/ctx/ftctx.h b/cpp_src/core/selectfunc/ctx/ftctx.h index 636ccc42a..af696e286 100644 --- a/cpp_src/core/selectfunc/ctx/ftctx.h +++ b/cpp_src/core/selectfunc/ctx/ftctx.h @@ -8,49 +8,58 @@ namespace reindexer { +struct FtCtxData : public intrusive_atomic_rc_base { + FtCtxData(BaseFunctionCtx::CtxType t) noexcept : type(t) {} + virtual ~FtCtxData() = default; + void InitHolders() { + assertrx_dbg(!holders.has_value()); + holders.emplace(); + } + typedef intrusive_ptr Ptr; + std::vector proc; + std::optional> holders; + bool isComposite = false; + bool isWordPositions = false; + std::string extraWordSymbols; + BaseFunctionCtx::CtxType type; +}; + +template +struct FtCtxAreaData : public FtCtxData { + FtCtxAreaData(BaseFunctionCtx::CtxType t) noexcept : FtCtxData(t) {} + std::vector> area; +}; enum FtSortType { RankOnly, RankAndID, ExternalExpression }; class FtCtx : public BaseFunctionCtx { public: typedef intrusive_ptr Ptr; - struct Data : public BaseFunctionCtx { - bool NeedArea() const noexcept { return holders_.has_value(); } - void InitHolders() { - assertrx_dbg(!holders_.has_value()); - holders_.emplace(); - } - - typedef intrusive_ptr Ptr; - std::vector proc_; - std::optional> holders_; - std::vector area_; - bool isComposite_ = false; - bool isWordPositions_ = false; - std::string extraWordSymbols_; - }; - - FtCtx() : data_(make_intrusive()) { this->type = BaseFunctionCtx::kFtCtx; } - int16_t Proc(size_t pos) const noexcept { return (pos < data_->proc_.size()) ? data_->proc_[pos] : 0; } + FtCtx(BaseFunctionCtx::CtxType t); + int16_t Proc(size_t pos) const noexcept { return (pos < data_->proc.size()) ? data_->proc[pos] : 0; } - template - void Add(InputIterator begin, InputIterator end, int16_t proc, AreaHolder&& holder); template void Add(InputIterator begin, InputIterator end, int16_t proc); - template - void Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector& mask, AreaHolder&& holder); template void Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector& mask); - void Reserve(size_t size) { data_->proc_.reserve(size); } - size_t Size() const noexcept { return data_->proc_.size(); } - bool NeedArea() const noexcept { return data_->NeedArea(); } - bool PrepareAreas(const RHashMap& fields, const std::string& name); + template + void Add(InputIterator begin, InputIterator end, int16_t proc, AreasInDocument&& holder); - void SetData(Data::Ptr data) noexcept { data_ = std::move(data); } - const Data::Ptr& GetData() const noexcept { return data_; } + template + void Add(InputIterator begin, InputIterator end, int16_t proc, const std::vector& mask, AreasInDocument&& holder); + + void Reserve(size_t size) { data_->proc.reserve(size); } + size_t Size() const noexcept { return data_->proc.size(); } + + void SetExtraWordSymbols(const std::string& s) { data_->extraWordSymbols = s; } + void SetWordPosition(bool v) { data_->isWordPositions = v; } + + FtCtxData::Ptr GetData() { return data_; } + void SetData(FtCtxData::Ptr data) noexcept { data_ = std::move(data); } private: - Data::Ptr data_; + FtCtxData::Ptr data_; }; + } // namespace reindexer diff --git a/cpp_src/core/selectfunc/functions/debugrank.cc b/cpp_src/core/selectfunc/functions/debugrank.cc new file mode 100644 index 000000000..ac04e94a9 --- /dev/null +++ b/cpp_src/core/selectfunc/functions/debugrank.cc @@ -0,0 +1,103 @@ +#include "debugrank.h" +#include "core/keyvalue/p_string.h" +#include "core/payload/payloadiface.h" +#include "core/selectfunc/ctx/ftctx.h" +#include "core/selectfunc/selectfuncparser.h" + +namespace reindexer { + +bool DebugRank::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruct& func, std::vector& stringsHolder) { + if (!func.funcArgs.empty()) { + throw Error(errParams, "'debug_rank()' does not expect any arguments, but got %d", func.funcArgs.size()); + } + if (!func.ctx || func.ctx->type != BaseFunctionCtx::CtxType::kFtAreaDebug) { + return false; + } + if (!func.tagsPath.empty()) { + throw Error(errConflict, "SetByJsonPath is not implemented yet!"); + } + + FtCtx::Ptr ftctx = reindexer::static_ctx_pointer_cast(func.ctx); + if (!ftctx->GetData()->isWordPositions) { + throw Error(errParams, "debug_rank() is supported for 'text' index only"); + } + + FtCtxAreaData& dataFtCtx = *(reindexer::static_ctx_pointer_cast>(ftctx->GetData())); + if (!dataFtCtx.holders.has_value()) { + return false; + } + const auto it = dataFtCtx.holders->find(res.Id()); + if (it == dataFtCtx.holders->end()) { + return false; + } + + Payload pl(plType, res.Value()); + + VariantArray kr; + pl.Get(func.field, kr); + + if (kr.empty() || !kr[0].Type().IsSame(KeyValueType::String{})) { + throw Error(errLogic, "Unable to apply debug_rank function to the non-string field '%s'", func.field); + } + + const std::string* data = p_string(kr[0]).getCxxstr(); + + const auto pva = dataFtCtx.area[it->second].GetAreas(func.fieldNo); + if (!pva || pva->Empty()) { + return false; + } + const auto& va = *pva; + + std::string resultString; + + Word2PosHelper word2pos(*data, ftctx->GetData()->extraWordSymbols); + + static const std::string_view startString = ""; + static const std::string_view endString = ""; + + const auto& areaVector = va.GetData(); + size_t id = 0; + size_t beforeStr = 0; + while (id < areaVector.size()) { + bool next = false; + int endStringCount = 0; + std::pair pos = word2pos.convert(areaVector[id].start, areaVector[id].end); + resultString += std::string_view(data->c_str() + beforeStr, pos.first - beforeStr); + do { + next = false; + switch (areaVector[id].phraseMode) { + case AreaDebug::PhraseMode::Start: + resultString += startString; + break; + case AreaDebug::PhraseMode::End: + endStringCount++; + break; + case AreaDebug::PhraseMode::None: + break; + } + resultString += areaVector[id].props; + id++; + if (id < areaVector.size() && areaVector[id].start == areaVector[id - 1].start) { + if (areaVector[id].end != areaVector[id - 1].end) { + throw Error(errLogic, "areas not equals start=%d ends(%d %d)", areaVector[id].start, areaVector[id].end, + areaVector[id - 1].end); + } + next = true; + } + } while (next); + resultString += std::string_view(data->c_str() + pos.first, pos.second - pos.first); + beforeStr = pos.second; + for (int i = 0; i < endStringCount; i++) { + resultString += endString; + } + } + resultString += std::string_view(data->c_str() + beforeStr, data->size() - beforeStr); + + stringsHolder.emplace_back(make_key_string(std::move(resultString))); + res.Value().Clone(); + + pl.Set(func.field, VariantArray{Variant{stringsHolder.back()}}); + + return true; +} +} // namespace reindexer \ No newline at end of file diff --git a/cpp_src/core/selectfunc/functions/debugrank.h b/cpp_src/core/selectfunc/functions/debugrank.h new file mode 100644 index 000000000..6cfecfa42 --- /dev/null +++ b/cpp_src/core/selectfunc/functions/debugrank.h @@ -0,0 +1,13 @@ +#pragma once +#include "core/item.h" +#include "core/queryresults/queryresults.h" + +namespace reindexer { + +struct SelectFuncStruct; + +class DebugRank { +public: + bool Process(ItemRef& res, PayloadType& plType, const SelectFuncStruct& func, std::vector& stringsHolder); +}; +} // namespace reindexer diff --git a/cpp_src/core/selectfunc/functions/highlight.cc b/cpp_src/core/selectfunc/functions/highlight.cc index ecbad1e7d..a3f9eb2b6 100644 --- a/cpp_src/core/selectfunc/functions/highlight.cc +++ b/cpp_src/core/selectfunc/functions/highlight.cc @@ -11,36 +11,35 @@ bool Highlight::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStru if (func.funcArgs.size() < 2) { throw Error(errParams, "Invalid highlight params need minimum 2 - have %d", func.funcArgs.size()); } - - if (!func.ctx || func.ctx->type != BaseFunctionCtx::kFtCtx) { + if (!func.ctx || func.ctx->type != BaseFunctionCtx::CtxType::kFtArea) { return false; } - + if (!func.tagsPath.empty()) { + throw Error(errConflict, "SetByJsonPath is not implemented yet!"); + } FtCtx::Ptr ftctx = reindexer::static_ctx_pointer_cast(func.ctx); - auto& dataFtCtx = *ftctx->GetData(); - if (!dataFtCtx.holders_.has_value()) { + FtCtxAreaData& dataFtCtx = *(reindexer::static_ctx_pointer_cast>(ftctx->GetData())); + if (!dataFtCtx.holders.has_value()) { return false; } - auto it = dataFtCtx.holders_->find(res.Id()); - if (it == dataFtCtx.holders_->end()) { + + auto it = dataFtCtx.holders->find(res.Id()); + if (it == dataFtCtx.holders->end()) { return false; } Payload pl(pl_type, res.Value()); VariantArray kr; - if (func.tagsPath.empty()) { - pl.Get(func.field, kr); - } else { - pl.GetByJsonPath(func.tagsPath, kr, KeyValueType::Undefined{}); - } + pl.Get(func.field, kr); if (kr.empty() || !kr[0].Type().IsSame(KeyValueType::String{})) { throw Error(errLogic, "Unable to apply highlight function to the non-string field '%s'", func.field); } const std::string* data = p_string(kr[0]).getCxxstr(); - auto pva = dataFtCtx.area_[it->second].GetAreas(func.fieldNo); + + auto pva = dataFtCtx.area[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) { return false; } @@ -50,12 +49,12 @@ bool Highlight::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStru result_string.reserve(data->size() + va.Size() * (func.funcArgs[0].size() + func.funcArgs[1].size())); result_string = *data; - Word2PosHelper word2pos(*data, ftctx->GetData()->extraWordSymbols_); + Word2PosHelper word2pos(*data, ftctx->GetData()->extraWordSymbols); int offset = 0; for (auto area : va.GetData()) { std::pair pos = - ftctx->GetData()->isWordPositions_ ? word2pos.convert(area.start, area.end) : std::make_pair(area.start, area.end); + ftctx->GetData()->isWordPositions ? word2pos.convert(area.start, area.end) : std::make_pair(area.start, area.end); result_string.insert(pos.first + offset, func.funcArgs[0]); offset += func.funcArgs[0].size(); @@ -68,11 +67,7 @@ bool Highlight::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStru stringsHolder.emplace_back(make_key_string(std::move(result_string))); res.Value().Clone(); - if (func.tagsPath.empty()) { - pl.Set(func.field, Variant{stringsHolder.back()}); - } else { - throw Error(errConflict, "SetByJsonPath is not implemented yet!"); - } + pl.Set(func.field, Variant{stringsHolder.back()}); return true; } diff --git a/cpp_src/core/selectfunc/functions/snippet.cc b/cpp_src/core/selectfunc/functions/snippet.cc index a6affd5fc..d512b97bb 100644 --- a/cpp_src/core/selectfunc/functions/snippet.cc +++ b/cpp_src/core/selectfunc/functions/snippet.cc @@ -185,10 +185,10 @@ A Snippet::RecalcZoneHelper::RecalcZoneToOffset(const Area& area) { return outAreas; } -void Snippet::buildResult(RecalcZoneHelper& recalcZoneHelper, const AreaBuffer& pva, const std::string& data, std::string& resultString) { +void Snippet::buildResult(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, + std::string& resultString) { // resultString =preDelim_+with_area_str+data_str_before+marker_before+zone_str+marker_after+data_strAfter+postDelim_ Area snippetAreaPrev; - Area snippetAreaPrevChar; zonesList_.clear(); for (const auto& area : pva.GetData()) { @@ -228,7 +228,7 @@ void Snippet::buildResult(RecalcZoneHelper& recalcZoneHelper, const AreaBuffer& resultString.append(postDelim_); } -void Snippet::buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreaBuffer& pva, const std::string& data, +void Snippet::buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, std::string& resultString) { // resultString =preDelim_+with_area_str+data_str_before+marker_before+zone_str+marker_after+data_strAfter+postDelim_ Area snippetAreaPrev; @@ -257,28 +257,31 @@ void Snippet::buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const Ar addSnippet(resultString, data, snippetAreaPrev, snippetAreaPrevChar); } -bool Snippet::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStruct& func, std::vector& stringsHolder) { +bool Snippet::Process(ItemRef& res, PayloadType& plType, const SelectFuncStruct& func, std::vector& stringsHolder) { if (!func.ctx) { return false; } + if (!func.tagsPath.empty()) { + throw Error(errConflict, "SetByJsonPath is not implemented yet!"); + } + init(func); FtCtx::Ptr ftctx = reindexer::static_ctx_pointer_cast(func.ctx); - auto& dataFtCtx = *ftctx->GetData(); - if (!dataFtCtx.isWordPositions_) { + auto& dataFtCtx = *(reindexer::static_ctx_pointer_cast>(ftctx->GetData())); + + if (!dataFtCtx.isWordPositions) { throw Error(errParams, "Snippet function does not work with ft_fuzzy index."); } - if (!func.tagsPath.empty()) { - throw Error(errConflict, "SetByJsonPath is not implemented yet!"); - } - if (!dataFtCtx.holders_.has_value()) { + if (!dataFtCtx.holders.has_value()) { return false; } - auto it = dataFtCtx.holders_->find(res.Id()); - if (it == dataFtCtx.holders_->end()) { + + auto it = dataFtCtx.holders->find(res.Id()); + if (it == dataFtCtx.holders->end()) { return false; } - Payload pl(pl_type, res.Value()); + Payload pl(plType, res.Value()); VariantArray kr; pl.Get(func.field, kr); @@ -287,7 +290,8 @@ bool Snippet::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStruct } const std::string* data = p_string(kr[0]).getCxxstr(); - auto pva = dataFtCtx.area_[it->second].GetAreas(func.fieldNo); + + auto pva = dataFtCtx.area[it->second].GetAreas(func.fieldNo); if (!pva || pva->Empty()) { return false; } @@ -295,7 +299,7 @@ bool Snippet::Process(ItemRef& res, PayloadType& pl_type, const SelectFuncStruct std::string resultString; resultString.reserve(data->size()); - RecalcZoneHelper recalcZoneHelper(*data, ftctx->GetData()->extraWordSymbols_, after_, before_, leftBound_, rightBound_); + RecalcZoneHelper recalcZoneHelper(*data, ftctx->GetData()->extraWordSymbols, after_, before_, leftBound_, rightBound_); if (needAreaStr_) { buildResultWithPrefix(recalcZoneHelper, *pva, *data, resultString); diff --git a/cpp_src/core/selectfunc/functions/snippet.h b/cpp_src/core/selectfunc/functions/snippet.h index d1240c8a5..c82dbda19 100644 --- a/cpp_src/core/selectfunc/functions/snippet.h +++ b/cpp_src/core/selectfunc/functions/snippet.h @@ -40,8 +40,8 @@ class Snippet { std::string_view leftBound_, rightBound_; }; - void buildResult(RecalcZoneHelper& recalcZoneHelper, const AreaBuffer& pva, const std::string& data, std::string& resultString); - void buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreaBuffer& pva, const std::string& data, + void buildResult(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, std::string& resultString); + void buildResultWithPrefix(RecalcZoneHelper& recalcZoneHelper, const AreasInField& pva, const std::string& data, std::string& resultString); bool isInit_ = false; diff --git a/cpp_src/core/selectfunc/selectfunc.cc b/cpp_src/core/selectfunc/selectfunc.cc index 734936b24..fdbdc8a0c 100644 --- a/cpp_src/core/selectfunc/selectfunc.cc +++ b/cpp_src/core/selectfunc/selectfunc.cc @@ -109,39 +109,6 @@ BaseFunctionCtx::Ptr SelectFunction::createFuncForProc(int indexNo) { } } -bool SelectFunction::NeedArea(int indexNo) const { - if (functions_.empty()) { - return false; - } - IndexType indexType = nm_.getIndexType(indexNo); - - auto checkField = [&](int field) -> bool { - const auto it = functions_.find(field); - if (it != functions_.end()) { - if (std::holds_alternative(it->second.func) || std::holds_alternative(it->second.func) || - std::holds_alternative(it->second.func)) { - return true; - } - } - return false; - }; - - if (IsComposite(indexType)) { - int cjsonFieldIdx = nm_.getIndexesCount(); - for (auto field : nm_.getIndexFields(indexNo)) { - if (field == IndexValueType::SetByJsonPath) { - field = cjsonFieldIdx++; - } - if (checkField(field)) { - return true; - } - } - } else { - return checkField(indexNo); - } - return false; -} - BaseFunctionCtx::Ptr SelectFunction::CreateCtx(int indexNo) { // we use this hack because ft always needs ctx to generate proc in response if (functions_.empty() && IsFullText(nm_.getIndexType(indexNo))) { @@ -220,8 +187,46 @@ bool SelectFunction::ProcessItem(ItemRef& res, PayloadType& pl_type, std::vector BaseFunctionCtx::Ptr SelectFunction::createCtx(SelectFuncStruct& data, BaseFunctionCtx::Ptr ctx, IndexType index_type) { if (IsFullText(index_type)) { if (!ctx) { - data.ctx = make_intrusive(); + switch (SelectFuncType(data.func.index())) { + case SelectFuncType::None: + data.ctx = make_intrusive(BaseFunctionCtx::CtxType::kFtCtx); + break; + case SelectFuncType::Snippet: + case SelectFuncType::Highlight: + case SelectFuncType::SnippetN: + data.ctx = make_intrusive(BaseFunctionCtx::CtxType::kFtArea); + break; + case SelectFuncType::DebugRank: + data.ctx = make_intrusive(BaseFunctionCtx::CtxType::kFtAreaDebug); + break; + case SelectFuncType::Max: + throw reindexer::Error(errLogic, "incorrect function type 'Max'"); + } } else { + switch (SelectFuncType(data.func.index())) { + case SelectFuncType::None: + if (ctx->type != BaseFunctionCtx::CtxType::kFtCtx) { + throw reindexer::Error(errLogic, "The existing calling context type '%d' does not allow this function", + int(ctx->type)); + } + break; + case SelectFuncType::Snippet: + case SelectFuncType::Highlight: + case SelectFuncType::SnippetN: + if (ctx->type != BaseFunctionCtx::CtxType::kFtArea) { + throw reindexer::Error(errLogic, "The existing calling context type '%d' does not allow this function", + int(ctx->type)); + } + break; + case SelectFuncType::DebugRank: + if (ctx->type != BaseFunctionCtx::CtxType::kFtAreaDebug) { + throw reindexer::Error(errLogic, "The existing calling context type '%d' does not allow this function", + int(ctx->type)); + } + break; + case SelectFuncType::Max: + throw reindexer::Error(errLogic, "incorrect function type 'Max'"); + } data.ctx = std::move(ctx); } const std::string& indexName = (data.indexNo >= nm_.getIndexesCount()) ? data.field : nm_.getIndexName(data.indexNo); diff --git a/cpp_src/core/selectfunc/selectfunc.h b/cpp_src/core/selectfunc/selectfunc.h index c263f54d1..e336fcbce 100644 --- a/cpp_src/core/selectfunc/selectfunc.h +++ b/cpp_src/core/selectfunc/selectfunc.h @@ -24,7 +24,6 @@ class SelectFunction : public intrusive_atomic_rc_base { /// @param indexNo - number of index. /// @return pointer to a function context or null if some error happened. BaseFunctionCtx::Ptr CreateCtx(int indexNo); - bool NeedArea(int indexNo) const; private: BaseFunctionCtx::Ptr createCtx(SelectFuncStruct& data, BaseFunctionCtx::Ptr ctx, IndexType index_type); @@ -32,7 +31,7 @@ class SelectFunction : public intrusive_atomic_rc_base { BaseFunctionCtx::Ptr createFuncForProc(int indexNo); /// Containers of functions by index number. - fast_hash_map functions_; + RHashMap functions_; /// Interface to NsSelector object. NsSelectFuncInterface nm_; diff --git a/cpp_src/core/selectfunc/selectfuncparser.cc b/cpp_src/core/selectfunc/selectfuncparser.cc index 797d7e295..4bd3872bf 100644 --- a/cpp_src/core/selectfunc/selectfuncparser.cc +++ b/cpp_src/core/selectfunc/selectfuncparser.cc @@ -191,38 +191,44 @@ SelectFuncStruct& SelectFuncParser::ParseFunction(tokenizer& parser, bool partOf static const Args args(4, {"pre_delim", "post_delim", "with_area", "left_bound", "right_bound"}); parsePositionalAndNamedArgs(parser, args); return selectFuncStruct_; + } else if (tok.text() == "debug_rank") { + selectFuncStruct_.func = DebugRank(); } selectFuncStruct_.funcName = std::string(tok.text()); tok = parser.next_token(tokenizer::flags::no_flags); if (tok.text() == "(") { - std::string arg; - while (!parser.end()) { - tok = parser.next_token(tokenizer::flags::no_flags); - if (tok.text() == ")") { - if (!partOfExpression) { - token nextTok = parser.next_token(tokenizer::flags::no_flags); - if (nextTok.text().length() > 0) { - throw Error(errParseDSL, "%s: Unexpected character `%s` after closing parenthesis", selectFuncStruct_.funcName, - nextTok.text()); + if (parser.peek_token(tokenizer::flags::no_flags).text() == ")") { + parser.next_token(tokenizer::flags::no_flags); + selectFuncStruct_.isFunction = true; + } else { + std::string arg; + while (!parser.end()) { + tok = parser.next_token(tokenizer::flags::no_flags); + if (tok.text() == ")") { + if (!partOfExpression) { + token nextTok = parser.next_token(tokenizer::flags::no_flags); + if (nextTok.text().length() > 0) { + throw Error(errParseDSL, "%s: Unexpected character `%s` after closing parenthesis", selectFuncStruct_.funcName, + nextTok.text()); + } } + selectFuncStruct_.funcArgs.emplace_back(std::move(arg)); + selectFuncStruct_.isFunction = true; + arg.clear(); + break; + } + if (tok.text() == "," && tok.type == TokenSymbol) { + selectFuncStruct_.funcArgs.emplace_back(std::move(arg)); + arg.clear(); + } else { + arg += tok.text(); } - selectFuncStruct_.funcArgs.emplace_back(std::move(arg)); - selectFuncStruct_.isFunction = true; - arg.clear(); - break; } - if (tok.text() == "," && tok.type == TokenSymbol) { - selectFuncStruct_.funcArgs.emplace_back(std::move(arg)); - arg.clear(); - } else { - arg += tok.text(); + if (!selectFuncStruct_.isFunction) { + throw Error(errParseDSL, "%s: The closing parenthesis is required, but found `%s`", selectFuncStruct_.funcName, tok.text()); } } - if (!selectFuncStruct_.isFunction) { - throw Error(errParseDSL, "%s: The closing parenthesis is required, but found `%s`. Select function name: `%s`", - selectFuncStruct_.funcName, tok.text(), selectFuncStruct_.funcName); - } } else { throw Error(errParseDSL, "%s: An open parenthesis is required, but found `%s`. Select function name: `%s`", selectFuncStruct_.funcName, tok.text(), selectFuncStruct_.funcName); diff --git a/cpp_src/core/selectfunc/selectfuncparser.h b/cpp_src/core/selectfunc/selectfuncparser.h index 0d7894f9e..ef5c45219 100644 --- a/cpp_src/core/selectfunc/selectfuncparser.h +++ b/cpp_src/core/selectfunc/selectfuncparser.h @@ -22,7 +22,7 @@ struct SelectFuncStruct { std::unordered_map namedArgs; BaseFunctionCtx::Ptr ctx; TagsPath tagsPath; - int indexNo = -1; + int indexNo = IndexValueType::NotSet; int fieldNo = 0; }; diff --git a/cpp_src/debug/backtrace.h b/cpp_src/debug/backtrace.h index d55d9bd38..29c629634 100644 --- a/cpp_src/debug/backtrace.h +++ b/cpp_src/debug/backtrace.h @@ -1,8 +1,8 @@ #pragma once #include -#include #include +#include namespace reindexer { namespace debug { diff --git a/cpp_src/gtests/bench/ft_bench.cc b/cpp_src/gtests/bench/ft_bench.cc index c0bdb667d..4d2b64b6e 100644 --- a/cpp_src/gtests/bench/ft_bench.cc +++ b/cpp_src/gtests/bench/ft_bench.cc @@ -9,8 +9,6 @@ #include "ft_fixture.h" #include "ft_merge_limit.h" -const std::string kStoragePath = "/tmp/reindex/ft_bench_test"; - using std::shared_ptr; using reindexer::Reindexer; @@ -23,15 +21,16 @@ const int kItemsInBenchDataset = 100'000; #endif int main(int argc, char** argv) { - if (reindexer::fs::RmDirAll(kStoragePath) < 0 && errno != ENOENT) { - std::cerr << "Could not clean working dir '" << kStoragePath << "'."; + const auto storagePath = reindexer::fs::JoinPath(reindexer::fs::GetTempDir(), "reindex/ft_bench_test"); + if (reindexer::fs::RmDirAll(storagePath) < 0 && errno != ENOENT) { + std::cerr << "Could not clean working dir '" << storagePath << "'."; std::cerr << "Reason: " << strerror(errno) << std::endl; return 1; } - shared_ptr DB = std::make_shared(); - auto err = DB->Connect("builtin://" + kStoragePath); + auto DB = std::make_shared(); + auto err = DB->Connect("builtin://" + storagePath); if (!err.ok()) { return err.code(); } diff --git a/cpp_src/gtests/bench/reindexer_bench.cc b/cpp_src/gtests/bench/reindexer_bench.cc index 669055241..1a094ac9b 100644 --- a/cpp_src/gtests/bench/reindexer_bench.cc +++ b/cpp_src/gtests/bench/reindexer_bench.cc @@ -16,8 +16,6 @@ #include "core/reindexer.h" -const std::string kStoragePath = "/tmp/reindex/bench_test"; - using std::shared_ptr; using reindexer::Reindexer; @@ -33,15 +31,16 @@ const int kItemsInComparatorsBenchDataset = 100'000; #endif int main(int argc, char** argv) { - if (reindexer::fs::RmDirAll(kStoragePath) < 0 && errno != ENOENT) { - std::cerr << "Could not clean working dir '" << kStoragePath << "'."; + const auto storagePath = reindexer::fs::JoinPath(reindexer::fs::GetTempDir(), "reindex/bench_test"); + if (reindexer::fs::RmDirAll(storagePath) < 0 && errno != ENOENT) { + std::cerr << "Could not clean working dir '" << storagePath << "'."; std::cerr << "Reason: " << strerror(errno) << std::endl; return 1; } - shared_ptr DB = std::make_shared(); - auto err = DB->Connect("builtin://" + kStoragePath); + auto DB = std::make_shared(); + auto err = DB->Connect("builtin://" + storagePath); if (!err.ok()) { return err.code(); } diff --git a/cpp_src/gtests/tests/API/base_tests.cc b/cpp_src/gtests/tests/API/base_tests.cc index c75694529..1371ce729 100644 --- a/cpp_src/gtests/tests/API/base_tests.cc +++ b/cpp_src/gtests/tests/API/base_tests.cc @@ -2045,11 +2045,11 @@ TEST_F(ReindexerApi, IntFieldConvertToStringIndexTest) { } TEST_F(ReindexerApi, MetaIndexTest) { - const std::string kStoragePath = reindexer::fs::JoinPath(reindexer::fs::GetTempDir(), "reindex/meta_index_test/"); - reindexer::fs::RmDirAll(kStoragePath); + const auto storagePath = reindexer::fs::JoinPath(reindexer::fs::GetTempDir(), "reindex/meta_index_test/"); + reindexer::fs::RmDirAll(storagePath); // ignore result auto rx = std::make_unique(); - auto err = rx->Connect("builtin://" + kStoragePath); + auto err = rx->Connect("builtin://" + storagePath); ASSERT_TRUE(err.ok()) << err.what(); err = rx->OpenNamespace(default_namespace, StorageOpts().Enabled().CreateIfMissing()); diff --git a/cpp_src/gtests/tests/fixtures/ft_api.cc b/cpp_src/gtests/tests/fixtures/ft_api.cc index 8db2d3aff..85efb7161 100644 --- a/cpp_src/gtests/tests/fixtures/ft_api.cc +++ b/cpp_src/gtests/tests/fixtures/ft_api.cc @@ -1,7 +1,7 @@ #include "ft_api.h" void FTApi::Init(const reindexer::FtFastConfig& ftCfg, unsigned nses, const std::string& storage) { - rt.reindexer.reset(new reindexer::Reindexer); + rt.reindexer = std::make_shared(); if (!storage.empty()) { auto err = rt.reindexer->Connect("builtin://" + storage); ASSERT_TRUE(err.ok()) << err.what(); @@ -154,11 +154,11 @@ void FTApi::AddInBothFields(std::string_view ns, std::string_view w1, std::strin rt.Commit(ns); } -reindexer::QueryResults FTApi::SimpleSelect(std::string word, bool withHighlight) { - auto q{reindexer::Query("nm1").Where("ft3", CondEq, std::move(word)).WithRank()}; +reindexer::QueryResults FTApi::SimpleSelect(std::string_view ns, std::string_view index, std::string_view dsl, bool withHighlight) { + auto q{reindexer::Query(ns).Where(index, CondEq, std::string(dsl)).WithRank()}; reindexer::QueryResults res; if (withHighlight) { - q.AddFunction("ft3 = highlight(!,!)"); + q.AddFunction(fmt::format("{} = highlight(!,!)", index)); } auto err = rt.reindexer->Select(q, res); EXPECT_TRUE(err.ok()) << err.what(); @@ -166,15 +166,6 @@ reindexer::QueryResults FTApi::SimpleSelect(std::string word, bool withHighlight return res; } -reindexer::QueryResults FTApi::SimpleSelect3(std::string word) { - auto qr{reindexer::Query("nm3").Where("ft", CondEq, std::move(word))}; - reindexer::QueryResults res; - qr.AddFunction("ft = highlight(!,!)"); - auto err = rt.reindexer->Select(qr, res); - EXPECT_TRUE(err.ok()) << err.what(); - return res; -} - reindexer::Error FTApi::Delete(int id) { reindexer::Item item = rt.NewItem("nm1"); item["id"] = id; diff --git a/cpp_src/gtests/tests/fixtures/ft_api.h b/cpp_src/gtests/tests/fixtures/ft_api.h index e4e1a7fa4..b9c4655f3 100644 --- a/cpp_src/gtests/tests/fixtures/ft_api.h +++ b/cpp_src/gtests/tests/fixtures/ft_api.h @@ -28,9 +28,11 @@ class FTApi : public ::testing::TestWithParam(); } Data randomItemData() { diff --git a/cpp_src/gtests/tests/fixtures/queries_api.h b/cpp_src/gtests/tests/fixtures/queries_api.h index bbca249b6..1a5326c69 100644 --- a/cpp_src/gtests/tests/fixtures/queries_api.h +++ b/cpp_src/gtests/tests/fixtures/queries_api.h @@ -743,11 +743,11 @@ class QueriesApi : public ReindexerApi, public QueriesVerifier { .Distinct(distinct.c_str()) .Sort(kFieldNameYear, true)); - ExecuteAndVerifyWithSql(Query(default_namespace) - .Select({distinct.c_str()}) - .Distinct(distinct.c_str()) - .Where(kFieldNameGenre, CondEq, randomGenre) - .Sort(kFieldNameYear, true)); + ExecuteAndVerifyWithSql(Query(default_namespace) + .Select({distinct.c_str()}) + .Distinct(distinct.c_str()) + .Where(kFieldNameGenre, CondEq, randomGenre) + .Sort(kFieldNameYear, true)); } } diff --git a/cpp_src/gtests/tests/fixtures/storage_lazy_load.h b/cpp_src/gtests/tests/fixtures/storage_lazy_load.h index c68e2c466..754c36f0e 100644 --- a/cpp_src/gtests/tests/fixtures/storage_lazy_load.h +++ b/cpp_src/gtests/tests/fixtures/storage_lazy_load.h @@ -5,7 +5,7 @@ class DISABLED_StorageLazyLoadApi : public ReindexerApi { public: - DISABLED_StorageLazyLoadApi() : pk_(0), inserted_(0) { rt.reindexer.reset(new Reindexer); } + DISABLED_StorageLazyLoadApi() : pk_(0), inserted_(0) { rt.reindexer = std::make_shared(); } ~DISABLED_StorageLazyLoadApi() { dropNs(); } void SetUp() override { diff --git a/cpp_src/gtests/tests/unit/ft/ft_generic.cc b/cpp_src/gtests/tests/unit/ft/ft_generic.cc index 6398ea66f..692db9767 100644 --- a/cpp_src/gtests/tests/unit/ft/ft_generic.cc +++ b/cpp_src/gtests/tests/unit/ft/ft_generic.cc @@ -70,7 +70,7 @@ TEST_P(FTGenericApi, CompositeSelect) { continue; } auto it = data.find(ritem[field].As()); - ASSERT_TRUE(it != data.end()); + ASSERT_TRUE(it != data.end()) << ritem[field].As(); data.erase(it); } } @@ -234,6 +234,7 @@ TEST_P(FTGenericApi, SelectWithDistance) { TEST_P(FTGenericApi, AreasOnSuffix) { auto ftCfg = GetDefaultConfig(); + ftCfg.optimization = reindexer::FtFastConfig::Optimization::CPU; Init(ftCfg); Add("the nos1 the nos2 the nosmn the nose"sv); @@ -249,6 +250,169 @@ TEST_P(FTGenericApi, AreasOnSuffix) { CheckResults("+nos* +*mask ", {{"the !nos1! the !mmask! stop !nos2! table", ""}}, false); } +TEST_P(FTGenericApi, DebugInfo) { + auto ftCfg = GetDefaultConfig(); + Init(ftCfg); + + Add("Маша ела кашу. Каша кушалась сама. Машу ругали."sv); + Add("Коля, Сеня гуляли."sv); + Add("слово простая фраза что то еще."sv); + Add("слово начало простая фраза конец что то еще простая фраза слово слово."sv); + Add("жил пил гулял"sv); + + auto removeLineEnd = [](std::vector& dataCompare) { + for (auto& s : dataCompare) { + s.erase(std::remove(s.begin(), s.end(), '\n'), s.end()); + } + }; + + { + reindexer::Query q("nm1"); + q.Where("ft3", CondEq, "маша"); + q.AddFunction("ft3 = debug_rank()"); + q.Select({"ft1"}); + reindexer::QueryResults res; + auto err = rt.reindexer->Select(q, res); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(res.Count(), 1); + auto it = res.begin(); + reindexer::WrSerializer wrSer; + err = it.GetJSON(wrSer, false); + ASSERT_TRUE(err.ok()) << err.what(); + // clang-format off + std::vector dataCompare={R"({"ft1": +"{termRank=97 term='маша' pattern='маша' bm25_norm=0.9798439468181269 termLenBoost=1 positionRank=1 normDist=0 proc=100 fullMatchBoost=0} Маша ела кашу. Каша кушалась сама. + {termRank=77 term='маша' pattern='машу' bm25_norm=0.9798439468181269 termLenBoost=1 positionRank=0.994 normDist=0 proc=80 fullMatchBoost=0} Машу ругали."})"}; + // clang-format on + removeLineEnd(dataCompare); + ASSERT_EQ(wrSer.Slice(), dataCompare[0]); + } + + { + reindexer::Query q("nm1"); + q.Where("ft3", CondEq, "коля сеня"); + q.AddFunction("ft3 = debug_rank()"); + q.Select({"ft1"}); + reindexer::QueryResults res; + auto err = rt.reindexer->Select(q, res); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(res.Count(), 1); + auto it = res.begin(); + reindexer::WrSerializer wrSer; + err = it.GetJSON(wrSer, false); + ASSERT_TRUE(err.ok()) << err.what(); + // clang-format off + std::vector dataCompare={R"({"ft1": +"{termRank=102 term='коля' pattern='коля' bm25_norm=1.022314131295854 termLenBoost=1 positionRank=1 normDist=0 proc=100 fullMatchBoost=0} Коля, + {termRank=102 term='сеня' pattern='сеня' bm25_norm=1.022314131295854 termLenBoost=1 positionRank=0.999 normDist=0 proc=100 fullMatchBoost=0} Сеня гуляли."})"}; + // clang-format on + removeLineEnd(dataCompare); + ASSERT_EQ(wrSer.Slice(), dataCompare[0]); + } + + { + reindexer::Query q("nm1"); + q.Where("ft3", CondEq, "'начало простая фраза конец' 'простая фраза'"); + q.AddFunction("ft3 = debug_rank()"); + q.Select({"ft1"}); + q.Sort("id", false); + q.WithRank(); + // clang-format off + std::vector dataCompare={ +R"###({"ft1":"слово + {termRank=93 term='простая' pattern='простая' bm25_norm=0.9399331930048559 termLenBoost=1 positionRank=0.999 normDist=0 proc=100 fullMatchBoost=0} простая + {termRank=85 term='фраза' pattern='фраза' bm25_norm=0.9399331930048559 termLenBoost=0.9142857193946838 positionRank=0.998 normDist=0 proc=100 fullMatchBoost=0} фраза + что то еще.","rank()":101.0})###", +R"##({"ft1":"слово + {termRank=92 term='начало' pattern='начало' bm25_norm=0.9624865670750559 termLenBoost=0.9571428596973419 positionRank=0.999 normDist=0 proc=100 fullMatchBoost=0} начало + {termRank=94 term='простая' pattern='простая' bm25_norm=0.9436916111700189 termLenBoost=1 positionRank=0.998 normDist=0 proc=100 fullMatchBoost=0} + {termRank=94 term='простая' pattern='простая' bm25_norm=0.9436916111700189 termLenBoost=1 positionRank=0.998 normDist=0 proc=100 fullMatchBoost=0} простая + {termRank=86 term='фраза' pattern='фраза' bm25_norm=0.9436916111700189 termLenBoost=0.9142857193946838 positionRank=0.997 normDist=0 proc=100 fullMatchBoost=0} + {termRank=86 term='фраза' pattern='фраза' bm25_norm=0.9436916111700189 termLenBoost=0.9142857193946838 positionRank=0.997 normDist=0 proc=100 fullMatchBoost=0} фраза + {termRank=87 term='конец' pattern='конец' bm25_norm=0.9624865670750559 termLenBoost=0.9142857193946838 positionRank=0.996 normDist=0 proc=100 fullMatchBoost=0} конец + что то еще + {termRank=94 term='простая' pattern='простая' bm25_norm=0.9436916111700189 termLenBoost=1 positionRank=0.998 normDist=0 proc=100 fullMatchBoost=0} простая + {termRank=86 term='фраза' pattern='фраза' bm25_norm=0.9436916111700189 termLenBoost=0.9142857193946838 positionRank=0.997 normDist=0 proc=100 fullMatchBoost=0} фраза + слово слово.","rank()":255.0})##" + }; + // clang-format on + removeLineEnd(dataCompare); + reindexer::QueryResults res; + auto err = rt.reindexer->Select(q, res); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(res.Count(), 2); + + size_t i = 0; + for (auto it : res) { + reindexer::WrSerializer wrSer; + err = it.GetJSON(wrSer, false); + ASSERT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(dataCompare[i], wrSer.Slice()); + i++; + } + } + + { + reindexer::Query q("nm1"); + q.Where("ft3", CondEq, "'простыми фразами'"); + q.AddFunction("ft3 = debug_rank()"); + q.Select({"ft1"}); + q.Sort("id", false); + // clang-format off + std::vector dataCompare={ +R"###({"ft1":"слово + {termRank=74 term='простыми' pattern='простая' bm25_norm=0.9399331930048559 termLenBoost=1 positionRank=0.999 normDist=0 proc=79 fullMatchBoost=0} простая + {termRank=74 term='фразами' pattern='фраза' bm25_norm=0.9399331930048559 termLenBoost=0.9624999999999999 positionRank=0.998 normDist=0 proc=82 fullMatchBoost=0} фраза + что то еще."})###", +R"###({"ft1":"слово начало + {termRank=74 term='простыми' pattern='простая' bm25_norm=0.9436916111700189 termLenBoost=1 positionRank=0.998 normDist=0 proc=79 fullMatchBoost=0} простая + {termRank=74 term='фразами' pattern='фраза' bm25_norm=0.9436916111700189 termLenBoost=0.9624999999999999 positionRank=0.997 normDist=0 proc=82 fullMatchBoost=0} фраза + конец что то еще + {termRank=74 term='простыми' pattern='простая' bm25_norm=0.9436916111700189 termLenBoost=1 positionRank=0.998 normDist=0 proc=79 fullMatchBoost=0} простая + {termRank=74 term='фразами' pattern='фраза' bm25_norm=0.9436916111700189 termLenBoost=0.9624999999999999 positionRank=0.997 normDist=0 proc=82 fullMatchBoost=0} фраза + слово слово."})###" + }; + // clang-format on + removeLineEnd(dataCompare); + + reindexer::QueryResults res; + auto err = rt.reindexer->Select(q, res); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(res.Count(), 2); + size_t i = 0; + for (auto it : res) { + reindexer::WrSerializer wrSer; + err = it.GetJSON(wrSer, false); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(dataCompare[i], wrSer.Slice()); + i++; + } + } + + { + reindexer::Query q("nm1"); + q.Where("ft3", CondEq, "жил~ пил"); + q.Select({"ft1"}); + q.AddFunction("ft3 = debug_rank()"); + reindexer::QueryResults res; + auto err = rt.reindexer->Select(q, res); + EXPECT_TRUE(err.ok()) << err.what(); + ASSERT_EQ(res.Count(), 1); + auto it = res.begin(); + reindexer::WrSerializer wrSer; + err = it.GetJSON(wrSer, false); + ASSERT_TRUE(err.ok()) << err.what(); + //clang-format off + std::vector dataCompare = { + R"({"ft1":"{termRank=102 term='жил' pattern='жил' bm25_norm=1.022314131295854 termLenBoost=1 positionRank=1 normDist=0 proc=100 fullMatchBoost=0} жил + {termRank=71 term='жил' pattern='ил' bm25_norm=1.022314131295854 termLenBoost=1 positionRank=0.999 normDist=0 proc=70 fullMatchBoost=0} + {termRank=102 term='пил' pattern='пил' bm25_norm=1.022314131295854 termLenBoost=1 positionRank=0.999 normDist=0 proc=100 fullMatchBoost=0} пил гулял"})"}; + //clang-format on + removeLineEnd(dataCompare); + ASSERT_EQ(wrSer.Slice(), dataCompare[0]); + } +} + TEST_P(FTGenericApi, AreasMaxRank) { auto ftCfg = GetDefaultConfig(); ftCfg.maxAreasInDoc = 3; diff --git a/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc b/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc index 577422741..10b6c9c42 100644 --- a/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc +++ b/cpp_src/gtests/tests/unit/ft/ft_incremental_build.cc @@ -29,7 +29,7 @@ class FTIncrementalBuildApi : public FTApi { enum class StrictSuffixValidation { No, Yes }; void Init(const reindexer::FtFastConfig& ftCfg) { - rt.reindexer.reset(new reindexer::Reindexer); + rt.reindexer = std::make_shared(); auto err = rt.reindexer->OpenNamespace(GetDefaultNamespace()); ASSERT_TRUE(err.ok()) << err.what(); rt.DefineNamespaceDataset(GetDefaultNamespace(), {IndexDeclaration{"id", "hash", "int", IndexOpts().PK(), 0}, diff --git a/cpp_src/gtests/tests/unit/ft/ft_min_relevance.cc b/cpp_src/gtests/tests/unit/ft/ft_min_relevance.cc new file mode 100644 index 000000000..a1f831d5f --- /dev/null +++ b/cpp_src/gtests/tests/unit/ft/ft_min_relevance.cc @@ -0,0 +1,78 @@ +#include +#include "ft_api.h" + +class FTMinRelevanceApi : public FTApi { +protected: + std::string_view GetDefaultNamespace() noexcept override { return "ft_min_relevance"; } + void CreateNs() { + const std::string_view nmName = GetDefaultNamespace(); + rt.OpenNamespace(nmName); + rt.DefineNamespaceDataset(nmName, {IndexDeclaration{"id", "hash", "int", IndexOpts().PK(), 0}, + IndexDeclaration{"ft1", "text", "string", IndexOpts(), 0}}); + reindexer::FtFastConfig cfg(0); + cfg.enableNumbersSearch = true; + cfg.logLevel = 5; + cfg.maxStepSize = 100; + auto err = SetFTConfig(cfg, nmName, "ft1", {"ft1"}); + ASSERT_TRUE(err.ok()) << err.what(); + } + std::set SelectIDs(std::string_view str) { + auto qr = SimpleSelect(GetDefaultNamespace(), "ft1", str, false); + std::set selectDocs; + for (auto v : qr) { + reindexer::Item it = v.GetItem(); + selectDocs.insert(it["id"].As()); + } + return selectDocs; + } + void DelDocuments() { + auto deleted = rt.Delete(reindexer::Query(GetDefaultNamespace()).Where("ft1", CondEq, words_[deleteWordIndex_])); + ASSERT_GT(deleted, 0); + } + + const size_t deleteWordIndex_ = 1; + const std::vector words_{"машина", "стол", "велосипед", "автобус"}; +}; + +TEST_F(FTMinRelevanceApi, CorrectDocWithMinRelevanceAndEmptyDoc) { + CreateNs(); + + // Fill namespace + std::map>> docs; + std::set deleteDocs; + int id = 0; + constexpr int vDocCount = 300; + const std::string_view nmName = GetDefaultNamespace(); + for (int i = 0; i < vDocCount; i++) { + auto [it, _] = docs.insert(std::make_pair(i, std::map>())); + for (int k = 0; k < (i % 10) + 1; k++) { + const size_t w1 = rand() % words_.size(); + const size_t w2 = rand() % words_.size(); + if (w1 == deleteWordIndex_ || w2 == deleteWordIndex_) { + deleteDocs.insert(id); + } + std::string doc = words_[w1] + " " + words_[w2] + " " + std::to_string(i); + it->second.insert(std::make_pair(id, std::make_pair(doc, true))); + reindexer::Item item = rt.NewItem(nmName); + item["id"] = id; + item["ft1"] = doc; + rt.Upsert(nmName, item); + id++; + } + SelectIDs("build"); + } + // Delete documents with some unique word + DelDocuments(); + + // Check, that there are no deleted docs in results + const auto selectDocs = SelectIDs("машина автобус"); + std::vector intersection; + std::set_intersection(deleteDocs.begin(), deleteDocs.end(), selectDocs.begin(), selectDocs.end(), std::back_inserter(intersection)); + if (!intersection.empty()) { + std::stringstream ss; + for (auto& v : intersection) { + ss << v << " "; + } + ASSERT_TRUE(false) << "Intersection must be empty: " << ss.str(); + } +} diff --git a/cpp_src/gtests/tests/unit/index_tuple_test.cc b/cpp_src/gtests/tests/unit/index_tuple_test.cc deleted file mode 100644 index ce7729fe2..000000000 --- a/cpp_src/gtests/tests/unit/index_tuple_test.cc +++ /dev/null @@ -1,529 +0,0 @@ -#include -#include "reindexer_api.h" -#include "tools/fsops.h" - -class IndexTupleTest : public ReindexerApi { -public: - void SetUp() override { - reindexer::fs::RmDirAll(kStoragePath); - ReindexerApi::SetUp(); - } - - [[nodiscard]] std::string CreateEmptyNamespace(std::string_view ns) { return createNS(ns, R"json({"id":%d})json"); } - - [[nodiscard]] std::string CreateNamespace(std::string_view ns) { - static const char pattern[] = - R"json({"id":%d,"objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}]})json"; - return createNS(ns, pattern); - } - - [[nodiscard]] std::string CreateSparseNamespace(std::string_view ns) { - return createNS(ns, R"json({"id":%d,"fld1":1,"fld2":{"nested":"test"}})json"); - } - - [[nodiscard]] std::string CreateArrayNamespace(std::string_view ns) { - static const char pattern[] = R"json({"id":%d,"obj":{"val":10},"arr":[1,2,3]})json"; - return createNS(ns, pattern); - } - - void DoTestDefault(const std::shared_ptr& reindexer, std::string_view ns, const reindexer::IndexDef& indexDef, - std::string_view pattern, std::string_view field, const VariantArray& expectedValues, - std::string_view description) const { - auto err = rt.reindexer->AddIndex(ns, indexDef); - ASSERT_TRUE(err.ok()) << err.what() << "\n" << description; - - validateResults(reindexer, ns, pattern, field, expectedValues, description); - } - - void DoTestEmpty(const std::shared_ptr& reindexer, std::string_view ns, const reindexer::IndexDef& indexDef, - std::string_view pattern, std::string_view description) const { - auto err = reindexer->AddIndex(ns, indexDef); - ASSERT_TRUE(err.ok()) << err.what(); - - checkExpectations(reindexer, ns, pattern, description); - } - - void DoCallAndCheckError(const std::shared_ptr& reindexer, std::string_view ns, - const reindexer::IndexDef& indexDef, std::string_view errMsg) const { - std::vector items; - getItems(reindexer, ns, items); - - auto err = reindexer->AddIndex(ns, indexDef); - ASSERT_FALSE(err.ok()); - ASSERT_EQ(err.what(), errMsg); - - checkItems(reindexer, ns, items); - } - - void ValidateReloadState(const std::shared_ptr& reindexer, std::string_view ns, std::string_view pattern, - std::string_view description, const std::string& storagePath) const { - auto err = rt.reindexer->CloseNamespace(ns); - ASSERT_TRUE(err.ok()) << err.what(); - - err = rt.reindexer->OpenNamespace(ns, StorageOpts().Enabled().CreateIfMissing().VerifyChecksums()); - ASSERT_TRUE(err.ok()) << err.what(); - - checkExpectations(reindexer, ns, pattern, description); - - // remove storage - err = rt.reindexer->CloseNamespace(ns); - ASSERT_TRUE(err.ok()) << err.what(); - - reindexer::fs::RmDirAll(storagePath); - } - - void SpecialCheckForNull(const std::shared_ptr& reindexer, std::string_view ns, std::string_view firstItemPattern, - std::string_view itemPattern, std::string_view description, const std::string& storagePath) const { - specialCheckForNull(reindexer, ns, firstItemPattern, itemPattern, description); - validateReloadStateForNull(reindexer, ns, firstItemPattern, itemPattern, description, storagePath); - } - - static constexpr uint32_t IdStart = 2000; - -private: - static constexpr char kStoragePath[] = "/tmp/reindex/"; - static constexpr uint32_t itemNumber_ = 5; // NOTE: minimum 2 - - [[nodiscard]] std::string createNS(std::string_view ns, std::string_view itemPattern) { - std::string storage; - createNamespace(ns, storage); - generateItems(ns, itemPattern); - return storage; - } - - void createNamespace(std::string_view ns, std::string& storagePath) { - storagePath = kStoragePath; - storagePath.append(ns); - - auto err = rt.reindexer->EnableStorage(storagePath); - ASSERT_TRUE(err.ok()) << err.what(); - - err = rt.reindexer->OpenNamespace(ns, StorageOpts().Enabled().CreateIfMissing()); - EXPECT_TRUE(err.ok()) << err.what(); - - DefineNamespaceDataset(ns, {IndexDeclaration{"id", "hash", "int", IndexOpts().PK(), 0}}); - } - - void generateItems(std::string_view ns, std::string_view pattern) { - for (uint32_t idx = IdStart, sz = IdStart + itemNumber_; idx < sz; ++idx) { - Item item = NewItem(ns); - EXPECT_TRUE(item.Status().ok()) << item.Status().what(); - - const auto json = fmt::sprintf(pattern.data(), idx); - auto err = item.FromJSON(json); - ASSERT_TRUE(err.ok()) << err.what(); - - Upsert(ns, item); - } - } - - void checkIfItemJSONValid(QueryResults::Iterator& it, bool print = false) const { - reindexer::WrSerializer wrser; - Error err = it.GetJSON(wrser, false); - EXPECT_TRUE(err.ok()) << err.what(); - if (err.ok() && print) { - std::cout << wrser.Slice() << std::endl; - } - } - - void validateResults(const std::shared_ptr& reindexer, std::string_view ns, std::string_view pattern, - std::string_view field, const VariantArray& expectedValues, std::string_view description) const { - SCOPED_TRACE(description); - - QueryResults qr; - auto err = reindexer->Select("SELECT * FROM " + std::string(ns), qr); - EXPECT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(qr.Count(), itemNumber_); - - for (auto it : qr) { - Item item = it.GetItem(false); - checkIfItemJSONValid(it); - const auto json = item.GetJSON(); - ASSERT_NE(json.find(pattern), std::string::npos) << "JSON: " << json << ";\npattern: " << pattern; - const VariantArray values = item[field]; - ASSERT_EQ(values.size(), expectedValues.size()); - ASSERT_EQ(values.IsArrayValue(), expectedValues.IsArrayValue()); - for (size_t i = 0; i < values.size(); ++i) { - ASSERT_TRUE(values[i].Type().IsSame(expectedValues[i].Type())) - << values[i].Type().Name() << "!=" << expectedValues[i].Type().Name(); - if (values[i].Type().IsSame(reindexer::KeyValueType::Null())) { - continue; - } - - ASSERT_EQ(values[i], expectedValues[i]); - } - } - } - - void checkExpectations(const std::shared_ptr& reindexer, std::string_view ns, std::string_view pattern, - std::string_view description) const { - SCOPED_TRACE(description); - - QueryResults qr; - auto err = reindexer->Select("SELECT * FROM " + std::string(ns), qr); - EXPECT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(qr.Count(), itemNumber_); - - uint32_t idx = IdStart; - for (auto it : qr) { - Item item = it.GetItem(false); - checkIfItemJSONValid(it); - - const auto json = item.GetJSON(); - const auto extJson = fmt::sprintf(pattern.data(), idx); - ASSERT_EQ(json, extJson); - - ++idx; - } - } - - void getItems(const std::shared_ptr& reindexer, std::string_view ns, std::vector& items) const { - QueryResults qr; - auto err = reindexer->Select("SELECT * FROM " + std::string(ns), qr); - ASSERT_TRUE(err.ok()) << err.what(); - - items.clear(); - items.reserve(qr.Count()); - for (auto& it : qr) { - auto item = it.GetItem(false); - items.emplace_back(item.GetJSON()); - } - } - - void checkItems(const std::shared_ptr& reindexer, std::string_view ns, - const std::vector& items) const { - QueryResults qr; - auto err = reindexer->Select("SELECT * FROM " + std::string(ns), qr); - ASSERT_TRUE(err.ok()) << err.what(); - - ASSERT_EQ(items.size(), qr.Count()); - auto itItems = items.cbegin(); - auto itQR = qr.begin(); - auto endItems = items.cend(); - auto endQR = qr.end(); - for (; (itItems != endItems) && (itQR != endQR); ++itItems, ++itQR) { - auto item = itQR.GetItem(false); - ASSERT_EQ(*itItems, item.GetJSON()); - } - } - - void specialCheckForNull(const std::shared_ptr& reindexer, std::string_view ns, std::string_view firstItemPattern, - std::string_view itemPattern, std::string_view description) const { - SCOPED_TRACE(description); - - // first element should not update values, all others should be initialized to default values - // Note: but index array updates element type - QueryResults qr; - auto err = reindexer->Select("SELECT * FROM " + std::string(ns), qr); - EXPECT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(qr.Count(), itemNumber_); - - uint32_t idx = IdStart; - for (auto it : qr) { - Item item = it.GetItem(false); - checkIfItemJSONValid(it); - const auto json = item.GetJSON(); - const auto& pattern = (idx == IdStart) ? firstItemPattern : itemPattern; - const auto expJson = fmt::sprintf(pattern.data(), idx); - ASSERT_EQ(json, expJson); - - ++idx; - } - } - - void validateReloadStateForNull(const std::shared_ptr& reindexer, std::string_view ns, - std::string_view firstItemPattern, std::string_view itemPattern, std::string_view description, - const std::string& storagePath) const { - auto err = rt.reindexer->CloseNamespace(ns); - ASSERT_TRUE(err.ok()) << err.what(); - - err = rt.reindexer->OpenNamespace(ns, StorageOpts().Enabled().VerifyChecksums()); - ASSERT_TRUE(err.ok()) << err.what(); - - specialCheckForNull(reindexer, ns, firstItemPattern, itemPattern, description); - - // remove storage - err = rt.reindexer->CloseNamespace(ns); - ASSERT_TRUE(err.ok()) << err.what(); - - reindexer::fs::RmDirAll(storagePath); - } -}; - -TEST_F(IndexTupleTest, DISABLED_ScalarTest) { - static const std::string ns = "testNSScalar"; - const auto storage = CreateEmptyNamespace(ns); - - DoTestEmpty(rt.reindexer, ns, {"sparse", "text", "string", IndexOpts().Sparse()}, R"({"id":%d})", "add some sparse index. Do nothing"); - DoTestDefault(rt.reindexer, ns, {"text", "text", "string", IndexOpts()}, R"("text":"")", "text", {Variant("")}, - "add text scalar index. Add default value"); - DoTestEmpty(rt.reindexer, ns, {"text", "text", "string", IndexOpts()}, R"({"id":%d,"text":""})", "update text index. Do nothing"); - DoCallAndCheckError(rt.reindexer, ns, {"text", "hash", "int", IndexOpts()}, - "Index 'testNSScalar.text' already exists with different settings"); - DoTestDefault(rt.reindexer, ns, {"int", "hash", "int", IndexOpts()}, R"("int":0)", "int", {Variant(0)}, - "add int scalar index. Add default value"); - ValidateReloadState(rt.reindexer, ns, R"({"id":%d,"text":"","int":0})", "reload ns (ScalarTest)", storage); -} - -TEST_F(IndexTupleTest, DISABLED_ScalarNestedTest) { - static const std::string ns = "testNSNested"; - const auto storage = CreateEmptyNamespace(ns); - - DoTestDefault(rt.reindexer, ns, {"obj.more.nested", {"obj.more.nested"}, "hash", "int64", IndexOpts()}, - R"("obj":{"more":{"nested":0}})", "obj.more.nested", {Variant(int64_t(0))}, "add new nested scalar index"); - DoTestEmpty(rt.reindexer, ns, {"obj.more.nested", {"obj.more.nested"}, "hash", "int64", IndexOpts()}, - R"({"id":%d,"obj":{"more":{"nested":0}}})", "update nested index. Do nothing"); - DoTestEmpty(rt.reindexer, ns, {"id+obj.more.nested", {"id", "obj.more.nested"}, "tree", "composite", IndexOpts{}}, - R"({"id":%d,"obj":{"more":{"nested":0}}})", "add new composite index. Do nothing"); - DoCallAndCheckError(rt.reindexer, ns, {"obj.more", {"obj.more"}, "hash", "string", IndexOpts()}, - "Invalid tag type value for KeyValueType: ''"); - DoCallAndCheckError(rt.reindexer, ns, {"obj", "hash", "int64", IndexOpts()}, "Invalid tag type value for KeyValueType: ''"); - DoTestDefault(rt.reindexer, ns, {"obj.near", {"obj.near"}, "tree", "string", IndexOpts()}, R"("obj":{"more":{"nested":0},"near":""})", - "obj.near", {Variant("")}, "add nested scalar index to root"); - DoTestDefault(rt.reindexer, ns, {"obj.nested.text", {"obj.nested.text"}, "hash", "string", IndexOpts()}, - R"("obj":{"more":{"nested":0},"near":"","nested":{"text":""}}})", "obj.nested.text", {Variant("")}, - "add nested another path scalar index"); - DoTestDefault(rt.reindexer, ns, {"obj.idx", {"obj.idx"}, "hash", "int64", IndexOpts()}, - R"("obj":{"more":{"nested":0},"near":"","nested":{"text":""},"idx":0})", "obj.idx", {Variant(int64_t(0))}, - "add nested 2nd level path scalar index"); - DoTestDefault(rt.reindexer, ns, {"obj.new.another.one", {"obj.new.another.one"}, "tree", "double", IndexOpts()}, - R"("obj":{"more":{"nested":0},"near":"","nested":{"text":""},"idx":0,"new":{"another":{"one":0.0}}}})", - "obj.new.another.one", {Variant(0.0)}, "add nested scalar index with multiple new path levels"); - DoCallAndCheckError(rt.reindexer, ns, {"boom", {"obj.new.another.one"}, "tree", "string", IndexOpts()}, - "Cannot add field with name 'boom' to namespace 'testNSNested'. Json path 'obj.new.another.one' already used" - " in field 'obj.new.another.one'"); - DoCallAndCheckError(rt.reindexer, ns, {"boom", {"obj.new.another.one.two"}, "hash", "int64", IndexOpts()}, - "Cannot add field with name 'boom' (jsonpath 'obj.new.another.one.two') and type 'int64' to namespace" - " 'testNSNested'. Already exists json path 'obj.new.another.one' with type 'double' in field" - " 'obj.new.another.one'. Rewriting is impossible"); - DoTestDefault( - rt.reindexer, ns, {"root2.more.nested", {"root2.more.nested"}, "hash", "int64", IndexOpts()}, - R"("obj":{"more":{"nested":0},"near":"","nested":{"text":""},"idx":0,"new":{"another":{"one":0.0}}},"root2":{"more":{"nested":0}})", - "root2.more.nested", {Variant(int64_t(0))}, "add new root with nested"); - DoTestDefault( - rt.reindexer, ns, {"boom", {"obj.new.another.one_ext"}, "hash", "int64", IndexOpts()}, - R"("obj":{"more":{"nested":0},"near":"","nested":{"text":""},"idx":0,"new":{"another":{"one":0.0,"one_ext":0}}},"root2":{"more":{"nested":0}})", - "obj.new.another.one_ext", {Variant(int64_t(0))}, "add new nested scalar index with name extension in last part"); - DoTestDefault( - rt.reindexer, ns, {"a-ha", {"a.ha"}, "hash", "int64", IndexOpts()}, - R"("obj":{"more":{"nested":0},"near":"","nested":{"text":""},"idx":0,"new":{"another":{"one":0.0,"one_ext":0}}},"root2":{"more":{"nested":0}},"a":{"ha":0})", - "a.ha", {Variant(int64_t(0))}, "add another nested scalar index on top level"); - ValidateReloadState( - rt.reindexer, ns, - R"({"id":%d,"obj":{"more":{"nested":0},"near":"","nested":{"text":""},"idx":0,"new":{"another":{"one":0.0,"one_ext":0}}},"root2":{"more":{"nested":0}},"a":{"ha":0}})", - "reload ns (ScalarNestedTest)", storage); -} - -TEST_F(IndexTupleTest, SparseItemTest) { - static const std::string ns = "testNSSparse"; - const auto storage = CreateSparseNamespace(ns); - - DoTestEmpty(rt.reindexer, ns, {"sparse1", {"fld1"}, "hash", "int", IndexOpts().Sparse()}, - R"({"id":%d,"fld1":1,"fld2":{"nested":"test"}})", "add some sparse index to present nested field. Do nothing"); - DoCallAndCheckError(rt.reindexer, ns, {"sparse2", {"fld2"}, "hash", "int", IndexOpts().Sparse()}, "Can't convert 'test' to number"); - DoCallAndCheckError(rt.reindexer, ns, {"sparse3", {"fld2.nested"}, "hash", "int", IndexOpts().Sparse()}, - "Can't convert 'test' to number"); - DoTestEmpty(rt.reindexer, ns, {"sparse2", {"fld2"}, "hash", "string", IndexOpts().Sparse()}, - R"({"id":%d,"fld1":1,"fld2":{"nested":"test"}})", "add some sparse index to present part path field. Do nothing"); - ValidateReloadState(rt.reindexer, ns, R"({"id":%d,"fld1":1,"fld2":{"nested":"test"}})", "reload ns (SparseItemTest)", storage); -} - -TEST_F(IndexTupleTest, NestedUpdateTest) { - static const std::string ns = "testNSUpdate"; - const auto storage = CreateNamespace(ns); - - DoTestDefault( - rt.reindexer, ns, {"obj.nested", {"obj.nested"}, "hash", "string", IndexOpts()}, - R"("objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":"0"},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}])", - "obj.nested", VariantArray{Variant{"0"}}, "add obj.nested index - update field type"); - DoCallAndCheckError(rt.reindexer, ns, {"try_change_type", {"last.text"}, "hash", "int", IndexOpts()}, "Can't convert 'OK' to number"); - ValidateReloadState( - rt.reindexer, ns, - R"({"id":%d,"objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":"0"},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}]})", - "reload ns (NestedUpdateTest)", storage); -} - -// TODO: This test must be reenabled after #1353 -TEST_F(IndexTupleTest, DISABLED_ArrayTest) { - static const std::string ns = "testNSArray"; - const auto storage = CreateEmptyNamespace(ns); - - DoTestDefault(rt.reindexer, ns, {"array", "hash", "int", IndexOpts().Array()}, R"("array":[])", "array", {}, - "add int array index. Add empty array"); - DoTestDefault(rt.reindexer, ns, - {"obj.some.arr_1st", {"obj.some.array", "arr_fld", "obj.array"}, "hash", "int64", IndexOpts().Array(), 0}, - R"("array":[],"arr_fld":[])", "arr_fld", VariantArray{}.MarkArray(), "add array index. Add empty array"); - DoCallAndCheckError(rt.reindexer, ns, {"obj.some.array", {"obj.array"}, "hash", "int64", IndexOpts().Array(), 0}, - "Cannot add field with name 'obj.some.array' to namespace 'testNSArray'. Json path 'obj.array' already used in " - "field 'obj.some.arr_1st'"); - DoTestDefault(rt.reindexer, ns, - {"obj.some.new_array", {"obj.some.new_array", "arr_fld1", "arr_fld2"}, "hash", "int64", IndexOpts().Array(), 0}, - R"("array":[],"arr_fld":[],"arr_fld2":[])", "arr_fld2", VariantArray{}.MarkArray(), - "add another array index (chooses last of two). Add empty array"); - // TODO: This logic is disabled due to #1819 - DoTestDefault(rt.reindexer, ns, {"obj.new.array", {"obj.new.array"}, "hash", "int64", IndexOpts().Array(), 0}, - R"("array":[],"arr_fld":[],"arr_fld2":[]})" /*,"obj":{"new":{"array":[]}})"*/, "obj.new.array", VariantArray{}, - "add new nested (only) index. Add empty array"); - // TODO: This logic is disabled due to #1819 - DoTestDefault(rt.reindexer, ns, {"arr", "hash", "int64", IndexOpts().Array()}, - R"("array":[],"arr_fld":[],"arr_fld2":[],"arr":[]})" /*,"obj":{"new":{"array":[]}},"arr":[])"*/, "arr", VariantArray{}, - "add new field with nested (only) indexes. Add empty array"); - DoCallAndCheckError(rt.reindexer, ns, - {"arr_restriction", {"arr_fld3", "arr_fld4", "arr.some.arr_1st"}, "hash", "int64", IndexOpts().Array(), 0}, - "Cannot add field with name 'arr_restriction' (jsonpath 'arr.some.arr_1st') and type 'int64' to namespace" - " 'testNSArray'. Already exists json path 'arr' with type 'int64' in field 'arr'. Rewriting is impossible"); - DoTestEmpty(rt.reindexer, ns, {"new_sparse_array", {"new_sparse_array"}, "hash", "int64", IndexOpts().Array().Sparse(), 0}, - R"({"id":%d,"array":[],"arr_fld":[],"arr_fld2":[],"arr":[]})" /*,"obj":{"new":{"array":[]}},"arr":[]})"*/, - "add new sparse array index. Do nothing"); - ValidateReloadState(rt.reindexer, ns, - R"({"id":%d,"array":[],"arr_fld":[],"arr_fld2":[],"arr":[]})" /*,"obj":{"new":{"array":[]}},"arr":[]})"*/, - "reload ns (ArrayTest)", storage); -} - -// TODO: This test must be reenabled after #1353 -TEST_F(IndexTupleTest, DISABLED_ArrayNestedTest) { - static const std::string ns = "testNSArrayObj"; - const auto storage = CreateNamespace(ns); - - DoCallAndCheckError(rt.reindexer, ns, {"try_change_type", {"last.text"}, "hash", "int", IndexOpts().Array().Sparse()}, - "Can't convert 'OK' to number"); - DoCallAndCheckError(rt.reindexer, ns, {"try_change_type", {"last.text"}, "hash", "int", IndexOpts().Array()}, - "Can't convert 'OK' to number"); - // TODO: This logic is disabled due to #1819 - DoTestDefault( - rt.reindexer, ns, {"next.another.last", {"next.another.last"}, "hash", "string", IndexOpts().Array()}, - R"("objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}]})" /*,"next":{"another":{"last":[]}})"*/ - , - "next.another.last", VariantArray{}, "add nested index to field by new path. Add empty array"); - DoTestDefault( - rt.reindexer, ns, {"obj.alternative", {"obj.alternative"}, "hash", "string", IndexOpts().Array()}, - R"("objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0,"alternative":[]},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}]})" /*,"next":{"another":{"last":[]}})"*/ - , - "obj.alternative", VariantArray{}, "add nested index to field. Add empty array"); - - DoTestDefault( - rt.reindexer, ns, {"last.1st.2nd.ext", {"last.1st.2nd.ext"}, "hash", "string", IndexOpts().Array()}, - R"("objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0,"alternative":[]},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14,"ext":[]}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}]})" /*,"next":{"another":{"last":[]}})"*/ - , - "last.1st.2nd.ext.more", VariantArray{}, "add nested-nested index to field. Add empty array"); - DoCallAndCheckError(rt.reindexer, ns, {"last.1st.2nd.ext", {"last.alt", "last.1st.2nd.ext"}, "hash", "string", IndexOpts().Array()}, - "Index 'testNSArrayObj.last.1st.2nd.ext' already exists with different settings"); - // TODO: This logic is disabled due to #1819 - // DoTestDefault( - // rt.reindexer, ns, {"last.1st.2nd.ext.more", {"last.1st.2nd.ext.more"}, "hash", "string", IndexOpts().Array()}, - // R"("objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0,"alternative":[]},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14,"ext":{"more":[]}}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}],"next":{"another":{"last":[]}})", - // "last.1st.2nd.ext.more", VariantArray{}, "add nested-nested index to field. Add empty array"); - // DoCallAndCheckError(rt.reindexer, ns, - // {"last.1st.2nd.ext.more", {"last.alt", "last.1st.2nd.ext.more"}, "hash", "string", IndexOpts().Array()}, - // "Index 'testNSArrayObj.last.1st.2nd.ext.more' already exists with different settings"); - DoTestDefault( - rt.reindexer, ns, {"last.1st.ext", {"last.1st.ext"}, "hash", "string", IndexOpts().Array()}, - R"("objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0,"alternative":[]},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14,"ext":[]}},"ext":[]},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}]})" /*,"next":{"another":{"last":[]}})"*/ - , - "last.1st.ext", VariantArray{}, "add array index into the presented nested field. Add empty array"); - ValidateReloadState( - rt.reindexer, ns, - R"({"id":%d,"objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0,"alternative":[]},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14,"ext":[]}},"ext":[]},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}]})" /*,"next":{"another":{"last":[]}}})"*/ - , - "reload ns (ArrayNestedTest)", storage); -} - -TEST_F(IndexTupleTest, ArrayInToArrayTest) { - static const std::string ns = "testNSArrayArr"; - const auto storage = CreateNamespace(ns); - - // TODO: This logic is disabled due to #1819 - DoTestDefault( - rt.reindexer, ns, {"objs.more", {"objs.more"}, "hash", "string", IndexOpts().Array()}, - R"("objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":[3,2,1]},{"field":11},{"field":[9]}]}])", - "obj.more", VariantArray{}, "do not add anything into objects array"); - DoTestEmpty( - rt.reindexer, ns, {"arr.nested_arr.field", {"arr.nested_arr.field"}, "hash", "string", IndexOpts().Array()}, - R"({"id":%d,"objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":["3","2","1"]},{"field":"11"},{"field":["9"]}]}]})", - "add nested index to array array (update). Do nothing"); - DoTestEmpty( - rt.reindexer, ns, {"arr.new_fld", {"arr.new_fld"}, "hash", "string", IndexOpts().Array()}, - R"({"id":%d,"objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":["3","2","1"]},{"field":"11"},{"field":["9"]}]}]})", - "add nested index to array array. Do nothing"); - DoTestEmpty( - rt.reindexer, ns, {"arr.nested_arr.ext_fld", {"arr.nested_arr.ext_fld"}, "hash", "string", IndexOpts().Array()}, - R"({"id":%d,"objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":["3","2","1"]},{"field":"11"},{"field":["9"]}]}]})", - "add nested nested index to array array. Do nothing"); - ValidateReloadState( - rt.reindexer, ns, - R"({"id":%d,"objs":[{"fld":1},{"fld":2},{"fld":5}],"obj":{"nested":0},"last":{"text":"OK","1st":{"2nd":{"3rd":3.14}}},"arr":[{"nested_arr":[{"field":["3","2","1"]},{"field":"11"},{"field":["9"]}]}]})", - "reload ns (ArrayInToArrayTest)", storage); -} - -// TODO: This test must be reenabled after #1353 -TEST_F(IndexTupleTest, DISABLED_NestedOrderingTest) { - static const std::string ns = "testNSNestedOrdering"; - const auto storage = CreateEmptyNamespace(ns); - - DoTestDefault(rt.reindexer, ns, {"nest1", {"obj.more.nested"}, "hash", "int", IndexOpts()}, R"("obj":{"more":{"nested":0}})", "nest1", - VariantArray{Variant{0}}, "add nest1. Add default value"); - DoTestDefault(rt.reindexer, ns, {"nest2", {"obj.near"}, "hash", "int", IndexOpts()}, R"("obj":{"more":{"nested":0},"near":0})", "nest2", - VariantArray{Variant{0}}, "add nest2. Add default value"); - DoTestDefault(rt.reindexer, ns, {"nest3", {"obj.nestd.text"}, "text", "string", IndexOpts()}, - R"("obj":{"more":{"nested":0},"near":0,"nestd":{"text":""}})", "nest3", VariantArray{Variant{""}}, - "add nest3. Add default value"); - DoTestDefault(rt.reindexer, ns, {"nest11", {"obj.more.nested2"}, "hash", "int", IndexOpts()}, - R"("obj":{"more":{"nested":0,"nested2":0},"near":0,"nestd":{"text":""}})", "nest11", VariantArray{Variant{0}}, - "add nest11. Add default value"); - ValidateReloadState(rt.reindexer, ns, R"({"id":%d,"obj":{"more":{"nested":0,"nested2":0},"near":0,"nestd":{"text":""}}})", - "reload ns (NestedDiffOrderingTest)", storage); -} - -// TODO: This test must be reenabled after #1353 -TEST_F(IndexTupleTest, DISABLED_NullTest) { - static const std::string ns = "testNSNull"; - const auto storage = CreateEmptyNamespace(ns); - - // update only one first item - { - const std::string sql = "UPDATE testNSNull SET fld1 = null, fld2 = [null, null] WHERE id = " + std::to_string(IdStart); - Query query = Query::FromSQL(sql); - QueryResults qr; - auto err = rt.reindexer->Update(query, qr); - ASSERT_TRUE(err.ok()) << err.what(); - ASSERT_EQ(qr.Count(), 1); - } - - // add indexes (simple and array) - { - auto err = rt.reindexer->AddIndex(ns, {"fld1", "hash", "int", IndexOpts()}); - ASSERT_TRUE(err.ok()) << err.what(); - err = rt.reindexer->AddIndex(ns, {"fld2", "hash", "string", IndexOpts().Array()}); - ASSERT_TRUE(err.ok()) << err.what(); - } - - SpecialCheckForNull(rt.reindexer, ns, R"({"id":%d,"fld1":null,"fld2":["null","null"]})", R"({"id":%d,"fld1":0,"fld2":[]})", - "null values test", storage); -} - -// TODO: This test must be reenabled after #1353 -TEST_F(IndexTupleTest, DISABLED_FailTest) { - static const std::string ns = "testNSFail"; - const auto storage = CreateEmptyNamespace(ns); - - DoTestDefault(rt.reindexer, ns, {"nest", {"obj.nest"}, "hash", "int", IndexOpts()}, R"("obj":{"nest":0})", "nest", - VariantArray{Variant{0}}, "add nest. Add default value"); - DoTestDefault(rt.reindexer, ns, {"idx", {"idx"}, "-", "bool", IndexOpts()}, R"("obj":{"nest":0},"idx":false)", "idx", - VariantArray{Variant{false}}, "add idx. Add default value"); - ValidateReloadState(rt.reindexer, ns, R"({"id":%d,"obj":{"nest":0},"idx":false})", "reload ns (FailTest)", storage); -} - -// TODO: This test must be reenabled after #1353 -TEST_F(IndexTupleTest, DISABLED_NestedArrayTest) { - static const std::string ns = "testNSNestedArray"; - const auto storage = CreateArrayNamespace(ns); - - // TODO: This logic is disabled due to #1819 - DoTestDefault(rt.reindexer, ns, {"obj.obj1.arr", {"obj.obj1.arr"}, "hash", "int", IndexOpts().Array()}, - R"("obj":{"val":10},"arr":[1,2,3])", "obj.obj1.arr", VariantArray{}, - // R"("obj":{"val":10,"obj1":{"arr":[]}},"arr":[1,2,3])", "obj.obj1.arr", VariantArray{}, - "add obj.obj1.arr. Add default value"); - DoTestDefault(rt.reindexer, ns, {"obj.arr", {"obj.arr"}, "hash", "int", IndexOpts().Array()}, - R"("obj":{"val":10,"arr":[]},"arr":[1,2,3])", "obj.arr", VariantArray{}, "add obj.arr. Add default value"); - ValidateReloadState(rt.reindexer, ns, R"({"id":%d,"obj":{"val":10,"arr":[]},"arr":[1,2,3]})", "reload ns (NestedArrayTest)", storage); -} diff --git a/cpp_src/gtests/tests/unit/namespace_test.cc b/cpp_src/gtests/tests/unit/namespace_test.cc index 28992e66d..47ca557a7 100644 --- a/cpp_src/gtests/tests/unit/namespace_test.cc +++ b/cpp_src/gtests/tests/unit/namespace_test.cc @@ -1516,13 +1516,13 @@ TEST_F(NsApi, ArrayRemoveWithSql) { Item item = it.GetItem(false); checkIfItemJSONValid(it); VariantArray values = item["array_field"]; - ASSERT_EQ(values.size(), 1 + 8 + 2); // [0] || [11,22,33,44,55,66,77,88] || [7,9] remove 99 - int i = 0; // 1 - for ( ; i < 9; ++i) { - ASSERT_EQ(values[i].As(), i * 11); // +8 + ASSERT_EQ(values.size(), 1 + 8 + 2); // [0] || [11,22,33,44,55,66,77,88] || [7,9] remove 99 + int i = 0; // 1 + for (; i < 9; ++i) { + ASSERT_EQ(values[i].As(), i * 11); // +8 } ASSERT_EQ(values[i++].As(), 7); - ASSERT_EQ(values[i].As(), 9); // +2 + ASSERT_EQ(values[i].As(), 9); // +2 } } @@ -1536,12 +1536,12 @@ TEST_F(NsApi, ArrayRemoveWithSql) { Item item = it.GetItem(false); checkIfItemJSONValid(it); VariantArray values = item["array_field"]; - ASSERT_EQ(values.size(), 1 + 8 + 1); // [0,11,22,33,44,55,66,77,88,9] remove 7 - int i = 0; // 1 - for ( ; i < 9; ++i) { - ASSERT_EQ(values[i].As(), i * 11); // +8 + ASSERT_EQ(values.size(), 1 + 8 + 1); // [0,11,22,33,44,55,66,77,88,9] remove 7 + int i = 0; // 1 + for (; i < 9; ++i) { + ASSERT_EQ(values[i].As(), i * 11); // +8 } - ASSERT_EQ(values[i].As(), 9); // +1 + ASSERT_EQ(values[i].As(), 9); // +1 } } @@ -1633,12 +1633,12 @@ TEST_F(NsApi, ArrayRemoveWithSql) { Item item = it.GetItem(false); checkIfItemJSONValid(it); VariantArray values = item["array_field"]; - ASSERT_EQ(values.size(), 9); // [11,22,33,44,55,66,77,88,9] remove 0 + ASSERT_EQ(values.size(), 9); // [11,22,33,44,55,66,77,88,9] remove 0 int i = 0; - for ( ; i < 8; ++i) { - ASSERT_EQ(values[i].As(), (i + 1) * 11); // +8 + for (; i < 8; ++i) { + ASSERT_EQ(values[i].As(), (i + 1) * 11); // +8 } - ASSERT_EQ(values[i].As(), 9); // +1 + ASSERT_EQ(values[i].As(), 9); // +1 } } @@ -1652,12 +1652,12 @@ TEST_F(NsApi, ArrayRemoveWithSql) { Item item = it.GetItem(false); checkIfItemJSONValid(it); VariantArray values = item["array_field"]; - ASSERT_EQ(values.size(), 8); // [11,22,33,44,55,66,77,9] remove 88 + ASSERT_EQ(values.size(), 8); // [11,22,33,44,55,66,77,9] remove 88 int i = 0; - for ( ; i < 7; ++i) { - ASSERT_EQ(values[i].As(), (i + 1) * 11); // +7 + for (; i < 7; ++i) { + ASSERT_EQ(values[i].As(), (i + 1) * 11); // +7 } - ASSERT_EQ(values[i].As(), 9); // +1 + ASSERT_EQ(values[i].As(), 9); // +1 } } diff --git a/cpp_src/gtests/tests/unit/protobuf_test.cc b/cpp_src/gtests/tests/unit/protobuf_test.cc index 885f0c65f..a53958b1f 100644 --- a/cpp_src/gtests/tests/unit/protobuf_test.cc +++ b/cpp_src/gtests/tests/unit/protobuf_test.cc @@ -19,10 +19,10 @@ const std::string kStreetValue = "Miracle Street, "; const std::string kPostalCodeValue = "9745 123 "; const double kSalaryValue = 11238761238768.232342342; -TEST_F(ReindexerApi, ProtobufConvesrionTest) { +TEST_F(ReindexerApi, ProtobufConversionTest) { // Check protobuf for basic types (int/double/array) and double <-> int conversion // !!! This test is using schema from cpp_src/gtests/tests/proto/conversion.proto. - // !!! Protobuf indexes are not constant and depend from the internal reindexer::Schema implementation. + // !!! Protobuf indexes are not persistent and depend on the internal implementation of reindexer::Schema. // clang-format off const std::string schema = R"z( { @@ -93,7 +93,7 @@ TEST_F(ReindexerApi, ProtobufConvesrionTest) { TEST_F(ReindexerApi, ProtobufEasyArrayTest) { // Check protobuf for arrays and nested objects // !!! This test is using schema from cpp_src/gtests/tests/proto/easyarrays.proto. - // !!! Protobuf indexes are not constant and depend from the internal reindexer::Schema implementation. + // !!! Protobuf indexes are not persistent and depend on the internal implementation of reindexer::Schema. // clang-format off const std::string schema = R"z( { diff --git a/cpp_src/net/cproto/dispatcher.h b/cpp_src/net/cproto/dispatcher.h index bf9c69cf9..c8b0c6d2a 100644 --- a/cpp_src/net/cproto/dispatcher.h +++ b/cpp_src/net/cproto/dispatcher.h @@ -89,7 +89,7 @@ class Dispatcher { /// Set closer notifier /// @param object close class object - /// @param func function, to be called on connecion close + /// @param func function, to be called on connection close template void OnClose(K* object, void (K::*func)(Context& ctx, const Error& err)) { onClose_ = [=](Context& ctx, const Error& err) { (static_cast(object)->*func)(ctx, err); }; @@ -113,7 +113,7 @@ class Dispatcher { /// @return OnResponse callback reference const std::function& OnResponseRef() const noexcept { return onResponse_; } - /// Handle RPC fron the context + /// Handle RPC from the context /// @param ctx - RPC context Error Handle(Context& ctx) { if rx_likely (uint32_t(ctx.call->cmd) < uint32_t(handlers_.size())) { @@ -141,7 +141,7 @@ class Dispatcher { template ::value, int> = 0> static T get_arg(const Args& args, size_t index, const Context& ctx) { if (index >= args.size()) { - throw Error(errParams, "Invalid args of %s call; argument %d is not submited", CmdName(ctx.call->cmd), static_cast(index)); + throw Error(errParams, "Invalid args of %s call; argument %d is not submitted", CmdName(ctx.call->cmd), static_cast(index)); } return T(args[index]); } @@ -172,7 +172,7 @@ class Dispatcher { std::function logger_; std::function onClose_; - // This should be called from the connection thread only to prevet access to other connection's ClientData + // This should be called from the connection thread only to prevent access to other connection's ClientData std::function onResponse_; }; } // namespace cproto diff --git a/cpp_src/net/iserverconnection.h b/cpp_src/net/iserverconnection.h index b005a788c..76f0a7beb 100644 --- a/cpp_src/net/iserverconnection.h +++ b/cpp_src/net/iserverconnection.h @@ -32,7 +32,7 @@ class IServerConnection { /// Restart connection /// @param s - socket of the accepted connection. - /// @return true - if successfuly restarted, false - if connection can't be restarted. + /// @return true - if successfully restarted, false - if connection can't be restarted. virtual bool Restart(socket&& s) = 0; /// Attach connection to another listener loop. Must be called from thread of loop /// @param loop - another loop to bind diff --git a/cpp_src/server/grpc/reindexerservice.cc b/cpp_src/server/grpc/reindexerservice.cc index adae54294..496417876 100644 --- a/cpp_src/server/grpc/reindexerservice.cc +++ b/cpp_src/server/grpc/reindexerservice.cc @@ -516,8 +516,6 @@ Error ReindexerService::buildItems(WrSerializer& wrser, const reindexer::QueryRe break; } case EncodingType::PROTOBUF: { - ProtobufBuilder builder(&wrser, ObjType::TypeObject); - ProtobufBuilder array = builder.Array("items"); for (auto& it : qr) { status = it.GetProtobuf(wrser, false); if (!status.ok()) { diff --git a/cpp_src/server/httpserver.cc b/cpp_src/server/httpserver.cc index bd59f8a18..c0ca367ed 100644 --- a/cpp_src/server/httpserver.cc +++ b/cpp_src/server/httpserver.cc @@ -1501,17 +1501,12 @@ int HTTPServer::queryResultsProtobuf(http::Context& ctx, const reindexer::QueryR WrSerializer wrSer(ctx.writer->GetChunk()); ProtobufBuilder protobufBuilder(&wrSer); - int itemsField = kProtoQueryResultsFields.at(kParamItems); for (size_t i = offset; i < res.Count() && i < offset + limit; i++) { - auto item = protobufBuilder.Object(itemsField); auto it = res[i]; - auto i1 = item.Object(res.getNsNumber(it.GetItemRef().Nsid()) + 1); const auto err = it.GetProtobuf(wrSer, false); if (!err.ok()) { return ctx.Protobuf(err.code(), wrSer.DetachChunk()); } - i1.End(); - item.End(); } int aggregationField = kProtoQueryResultsFields.at(kParamAggregations); diff --git a/fulltext.md b/fulltext.md index a464a2b76..a6bfdddb0 100644 --- a/fulltext.md +++ b/fulltext.md @@ -18,6 +18,7 @@ Reindexer has builtin full text search engine. This document describes usage of - [Highlight](#highlight) - [Snippet](#snippet) - [Snippet_n](#snippet_n) + - [Debug_rank](#debug_rank) - [Typos algorithm](#typos-algorithm) - [Typos handling details](#typos-handling-details) - [More examples](#more-examples) @@ -193,7 +194,7 @@ It is possible to merge multiple queries results and sort final result by releva ``` ## Using select functions It is possible to use select functions to process result data. -For now you can use snippet, snippet_n and highlight. For composite indexes the result of the function will be written in to corresponding subfields. +For now you can use snippet, snippet_n and highlight, debug_rank. For composite indexes the result of the function will be written in to corresponding subfields. You can not put [,)\0] symbols in functions params. If the value contains special characters, it must be enclosed in single quotes. @@ -273,6 +274,10 @@ b.Query("items").Match("text", query).Limit(limit).Offset(offset).Functions("tex result: "{me text str}" +### Debug_rank + +This function outputs additional information about ranking of the found word in the text in the key-value format. Returned format and content may vary depending on reindexer's version. Works with `text`-index only. + ## Typos algorithm Reindexer finds misspelled words by matching terms with deleted symbols. diff --git a/test/queries_test.go b/test/queries_test.go index 5c5ec2095..ee491e037 100644 --- a/test/queries_test.go +++ b/test/queries_test.go @@ -558,7 +558,7 @@ func TestQueries(t *testing.T) { panic(err) } - CheckTestItemsQueries(t, testCaseWithIDOnlyIndexe) + CheckTestItemsQueries(t, testCaseWithIDOnlyIndexes) }) t.Run("Sparse indexed queries", func(t *testing.T) { t.Parallel() @@ -1267,7 +1267,7 @@ var testCaseWithCommonIndexes = IndexesTestCase{ }, Item: TestItem{}, } -var testCaseWithIDOnlyIndexe = IndexesTestCase{ +var testCaseWithIDOnlyIndexes = IndexesTestCase{ Name: "TEST WITH ID ONLY INDEX", Namespace: "test_items_id_only", Options: sortDistinctOptions{