From 3672f9b2c322c4c8f073acc5973fffce546bd4e5 Mon Sep 17 00:00:00 2001 From: Wen Hui Date: Thu, 7 Nov 2024 20:05:16 -0500 Subject: [PATCH 01/60] Revert "Decline unsubscribe related command in non-subscribed mode" (#1265) This PR goal is to revert the changes on PR https://github.com/valkey-io/valkey/pull/759 Recently, we got some reports that in Valkey 8.0 the PR https://github.com/valkey-io/valkey/pull/759 (Decline unsubscribe related command in non-subscribed mode) causes break change. (https://github.com/valkey-io/valkey/issues/1228) Although from my thought, call commands "unsubscribeCommand", "sunsubscribeCommand", "punsubscribeCommand" in request-response mode make no sense. This is why I created PR https://github.com/valkey-io/valkey/pull/759 But breaking change is always no good, @valkey-io/core-team How do you think we revert this PR code changes? Signed-off-by: hwware --- src/server.c | 6 ------ tests/unit/info.tcl | 3 +-- tests/unit/pubsub.tcl | 25 ++++++++++++++++++++----- tests/unit/pubsubshard.tcl | 5 +++-- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/src/server.c b/src/server.c index 5658b05115..eda9a5b582 100644 --- a/src/server.c +++ b/src/server.c @@ -4165,12 +4165,6 @@ int processCommand(client *c) { return C_OK; } - /* Not allow several UNSUBSCRIBE commands executed under non-pubsub mode */ - if (!c->flag.pubsub && (c->cmd->proc == unsubscribeCommand || c->cmd->proc == sunsubscribeCommand || - c->cmd->proc == punsubscribeCommand)) { - rejectCommandFormat(c, "-NOSUB '%s' command executed not in subscribed mode", c->cmd->fullname); - return C_OK; - } /* Only allow commands with flag "t", such as INFO, REPLICAOF and so on, * when replica-serve-stale-data is no and we are a replica with a broken * link with primary. */ diff --git a/tests/unit/info.tcl b/tests/unit/info.tcl index 61d1acd1f8..278a1d8e33 100644 --- a/tests/unit/info.tcl +++ b/tests/unit/info.tcl @@ -424,8 +424,7 @@ start_server {tags {"info" "external:skip"}} { set info [r info clients] assert_equal [getInfoProperty $info pubsub_clients] {1} # non-pubsub clients should not be involved - catch {unsubscribe $rd2 {non-exist-chan}} e - assert_match {*NOSUB*} $e + assert_equal {0} [unsubscribe $rd2 {non-exist-chan}] set info [r info clients] assert_equal [getInfoProperty $info pubsub_clients] {1} # close all clients diff --git a/tests/unit/pubsub.tcl b/tests/unit/pubsub.tcl index 68dc79a4a4..24b78b6e5a 100644 --- a/tests/unit/pubsub.tcl +++ b/tests/unit/pubsub.tcl @@ -109,12 +109,9 @@ start_server {tags {"pubsub network"}} { $rd1 close } - test "UNSUBSCRIBE and PUNSUBSCRIBE from non-subscribed channels" { + test "UNSUBSCRIBE from non-subscribed channels" { set rd1 [valkey_deferring_client] - foreach command {unsubscribe punsubscribe} { - catch {$command $rd1 {foo bar quux}} e - assert_match {*NOSUB*} $e - } + assert_equal {0 0 0} [unsubscribe $rd1 {foo bar quux}] # clean up clients $rd1 close } @@ -204,6 +201,14 @@ start_server {tags {"pubsub network"}} { $rd close } {0} {resp3} + test "PUNSUBSCRIBE from non-subscribed channels" { + set rd1 [valkey_deferring_client] + assert_equal {0 0 0} [punsubscribe $rd1 {foo.* bar.* quux.*}] + + # clean up clients + $rd1 close + } + test "NUMSUB returns numbers, not strings (#1561)" { r pubsub numsub abc def } {abc 0 def 0} @@ -241,6 +246,16 @@ start_server {tags {"pubsub network"}} { $rd1 close } + test "PUNSUBSCRIBE and UNSUBSCRIBE should always reply" { + # Make sure we are not subscribed to any channel at all. + r punsubscribe + r unsubscribe + # Now check if the commands still reply correctly. + set reply1 [r punsubscribe] + set reply2 [r unsubscribe] + concat $reply1 $reply2 + } {punsubscribe {} 0 unsubscribe {} 0} + ### Keyspace events notification tests test "Keyspace notifications: we receive keyspace notifications" { diff --git a/tests/unit/pubsubshard.tcl b/tests/unit/pubsubshard.tcl index d62a415705..e0e1e2972b 100644 --- a/tests/unit/pubsubshard.tcl +++ b/tests/unit/pubsubshard.tcl @@ -74,8 +74,9 @@ start_server {tags {"pubsubshard external:skip"}} { test "SUNSUBSCRIBE from non-subscribed channels" { set rd1 [valkey_deferring_client] - catch {sunsubscribe $rd1 {foo}} e - assert_match {*NOSUB*} $e + assert_equal {0} [sunsubscribe $rd1 {foo}] + assert_equal {0} [sunsubscribe $rd1 {bar}] + assert_equal {0} [sunsubscribe $rd1 {quux}] # clean up clients $rd1 close From 07b3e7ae7a9e08101fa4dd50aebb8fa5fbdd4f1e Mon Sep 17 00:00:00 2001 From: eifrah-aws Date: Fri, 8 Nov 2024 04:01:37 +0200 Subject: [PATCH 02/60] Add CMake build system for valkey (#1196) With this commit, users are able to build valkey using `CMake`. ## Example usage: Build `valkey-server` in Release mode with TLS enabled and using `jemalloc` as the allocator: ```bash mkdir build-release cd $_ cmake .. -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX=/tmp/valkey-install \ -DBUILD_MALLOC=jemalloc -DBUILD_TLS=1 make -j$(nproc) install # start valkey /tmp/valkey-install/bin/valkey-server ``` Build `valkey-unit-tests`: ```bash mkdir build-release-ut cd $_ cmake .. -DCMAKE_BUILD_TYPE=Release \ -DBUILD_MALLOC=jemalloc -DBUILD_UNIT_TESTS=1 make -j$(nproc) # Run the tests ./bin/valkey-unit-tests ``` Current features supported by this PR: - Building against different allocators: (`jemalloc`, `tcmalloc`, `tcmalloc_minimal` and `libc`), e.g. to enable `jemalloc` pass `-DBUILD_MALLOC=jemalloc` to `cmake` - OpenSSL builds (to enable TLS, pass `-DBUILD_TLS=1` to `cmake`) - Sanitizier: pass `-DBUILD_SANITIZER=` to `cmake` - Install target + redis symbolic links - Build `valkey-unit-tests` executable - Standard CMake variables are supported. e.g. to install `valkey` under `/home/you/root` pass `-DCMAKE_INSTALL_PREFIX=/home/you/root` Why using `CMake`? To list *some* of the advantages of using `CMake`: - Superior IDE integrations: cmake generates the file `compile_commands.json` which is required by `clangd` to get a compiler accuracy code completion (in other words: your VScode will thank you) - Out of the source build tree: with the current build system, object files are created all over the place polluting the build source tree, the best practice is to build the project on a separate folder - Multiple build types co-existing: with the current build system, it is often hard to have multiple build configurations. With cmake you can do it easily: - It is the de-facto standard for C/C++ project these days More build examples: ASAN build: ```bash mkdir build-asan cd $_ cmake .. -DBUILD_SANITIZER=address -DBUILD_MALLOC=libc make -j$(nproc) ``` ASAN with jemalloc: ```bash mkdir build-asan-jemalloc cd $_ cmake .. -DBUILD_SANITIZER=address -DBUILD_MALLOC=jemalloc make -j$(nproc) ``` As seen by the previous examples, any combination is allowed and co-exist on the same source tree. ## Valkey installation With this new `CMake`, it is possible to install the binary by running `make install` or creating a package `make package` (currently supported on Debian like distros) ### Example 1: build & install using `make install`: ```bash mkdir build-release cd $_ cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/valkey-install -DCMAKE_BUILD_TYPE=Release make -j$(nproc) install # valkey is now installed under $HOME/valkey-install ``` ### Example 2: create a `.deb` installer: ```bash mkdir build-release cd $_ cmake .. -DCMAKE_BUILD_TYPE=Release make -j$(nproc) package # ... CPack deb generation output sudo gdebi -n ./valkey_8.1.0_amd64.deb # valkey is now installed under /opt/valkey ``` ### Example 3: create installer for non Debian systems (e.g. FreeBSD or macOS): ```bash mkdir build-release cd $_ cmake .. -DCMAKE_BUILD_TYPE=Release make -j$(nproc) package mkdir -p /opt/valkey && ./valkey-8.1.0-Darwin.sh --prefix=/opt/valkey --exclude-subdir # valkey-server is now installed under /opt/valkey ``` Signed-off-by: Eran Ifrah --- .cmake-format.yaml | 76 ++++++ .github/workflows/ci.yml | 25 ++ .gitignore | 2 + CMakeLists.txt | 43 ++++ README.md | 124 +++++++--- cmake/Modules/Packaging.cmake | 44 ++++ cmake/Modules/SourceFiles.cmake | 153 ++++++++++++ cmake/Modules/Utils.cmake | 102 ++++++++ cmake/Modules/ValkeySetup.cmake | 381 ++++++++++++++++++++++++++++++ deps/CMakeLists.txt | 26 ++ deps/fpconv/CMakeLists.txt | 4 + deps/hdr_histogram/CMakeLists.txt | 7 + deps/jemalloc/CMakeLists.txt | 23 ++ deps/linenoise/CMakeLists.txt | 4 + deps/lua/CMakeLists.txt | 44 ++++ src/CMakeLists.txt | 77 ++++++ src/modules/CMakeLists.txt | 21 ++ src/server.c | 1 - src/unit/CMakeLists.txt | 58 +++++ tests/CMakeLists.txt | 5 + tests/modules/CMakeLists.txt | 58 +++++ tests/rdma/CMakeLists.txt | 9 + 22 files changed, 1252 insertions(+), 35 deletions(-) create mode 100644 .cmake-format.yaml create mode 100644 CMakeLists.txt create mode 100644 cmake/Modules/Packaging.cmake create mode 100644 cmake/Modules/SourceFiles.cmake create mode 100644 cmake/Modules/Utils.cmake create mode 100644 cmake/Modules/ValkeySetup.cmake create mode 100644 deps/CMakeLists.txt create mode 100644 deps/fpconv/CMakeLists.txt create mode 100644 deps/hdr_histogram/CMakeLists.txt create mode 100644 deps/jemalloc/CMakeLists.txt create mode 100644 deps/linenoise/CMakeLists.txt create mode 100644 deps/lua/CMakeLists.txt create mode 100644 src/CMakeLists.txt create mode 100644 src/modules/CMakeLists.txt create mode 100644 src/unit/CMakeLists.txt create mode 100644 tests/CMakeLists.txt create mode 100644 tests/modules/CMakeLists.txt create mode 100644 tests/rdma/CMakeLists.txt diff --git a/.cmake-format.yaml b/.cmake-format.yaml new file mode 100644 index 0000000000..98ab11753a --- /dev/null +++ b/.cmake-format.yaml @@ -0,0 +1,76 @@ +format: + _help_line_width: + - How wide to allow formatted cmake files + line_width: 120 + _help_tab_size: + - How many spaces to tab for indent + tab_size: 4 + _help_use_tabchars: + - If true, lines are indented using tab characters (utf-8 + - 0x09) instead of space characters (utf-8 0x20). + - In cases where the layout would require a fractional tab + - character, the behavior of the fractional indentation is + - governed by + use_tabchars: false + _help_separate_ctrl_name_with_space: + - If true, separate flow control names from their parentheses + - with a space + separate_ctrl_name_with_space: true + _help_min_prefix_chars: + - If the statement spelling length (including space and + - parenthesis) is smaller than this amount, then force reject + - nested layouts. + min_prefix_chars: 4 + _help_max_prefix_chars: + - If the statement spelling length (including space and + - parenthesis) is larger than the tab width by more than this + - amount, then force reject un-nested layouts. + max_prefix_chars: 10 + _help_max_lines_hwrap: + - If a candidate layout is wrapped horizontally but it exceeds + - this many lines, then reject the layout. + max_lines_hwrap: 2 + _help_line_ending: + - What style line endings to use in the output. + line_ending: unix + _help_command_case: + - Format command names consistently as 'lower' or 'upper' case + command_case: lower + _help_keyword_case: + - Format keywords consistently as 'lower' or 'upper' case + keyword_case: unchanged + _help_always_wrap: + - A list of command names which should always be wrapped + always_wrap: [] + _help_enable_sort: + - If true, the argument lists which are known to be sortable + - will be sorted lexicographicall + enable_sort: true + _help_autosort: + - If true, the parsers may infer whether or not an argument + - list is sortable (without annotation). + autosort: false + _help_require_valid_layout: + - By default, if cmake-format cannot successfully fit + - everything into the desired linewidth it will apply the + - last, most agressive attempt that it made. If this flag is + - True, however, cmake-format will print error, exit with non- + - zero status code, and write-out nothing + require_valid_layout: false + _help_layout_passes: + - A dictionary mapping layout nodes to a list of wrap + - decisions. See the documentation for more information. + layout_passes: {} +encode: + _help_emit_byteorder_mark: + - If true, emit the unicode byte-order mark (BOM) at the start + - of the file + emit_byteorder_mark: false + _help_input_encoding: + - Specify the encoding of the input file. Defaults to utf-8 + input_encoding: utf-8 + _help_output_encoding: + - Specify the encoding of the output file. Defaults to utf-8. + - Note that cmake only claims to support utf-8 so be careful + - when using anything else + output_encoding: utf-8 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 48a94ef984..bc946b7193 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -34,6 +34,31 @@ jobs: run: | ./src/valkey-unit-tests + test-ubuntu-latest-cmake: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + - name: cmake and make + run: | + sudo apt-get install -y cmake libssl-dev + mkdir -p build-release + cd build-release + cmake -DCMAKE_BUILD_TYPE=Release .. -DBUILD_TLS=yes -DBUILD_UNIT_TESTS=yes + make -j$(nproc) + - name: test + run: | + sudo apt-get install -y tcl8.6 tclx + ln -sf $(pwd)/build-release/bin/valkey-server $(pwd)/src/valkey-server + ln -sf $(pwd)/build-release/bin/valkey-cli $(pwd)/src/valkey-cli + ln -sf $(pwd)/build-release/bin/valkey-benchmark $(pwd)/src/valkey-benchmark + ln -sf $(pwd)/build-release/bin/valkey-server $(pwd)/src/valkey-check-aof + ln -sf $(pwd)/build-release/bin/valkey-server $(pwd)/src/valkey-check-rdb + ln -sf $(pwd)/build-release/bin/valkey-server $(pwd)/src/valkey-sentinel + ./runtest --verbose --tags -slow --dump-logs + - name: unit tests + run: | + ./build-release/bin/valkey-unit-tests + test-sanitizer-address: runs-on: ubuntu-latest steps: diff --git a/.gitignore b/.gitignore index e448e23f7e..b108b4bb92 100644 --- a/.gitignore +++ b/.gitignore @@ -49,3 +49,5 @@ nodes*.conf tests/cluster/tmp/* tests/rdma/rdma-test tags +build-debug/ +build-release/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000000..ad0bab8896 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,43 @@ +cmake_minimum_required(VERSION 3.20) + +# Must be done first +if (APPLE) + # Force clang compiler on macOS + find_program(CLANGPP "clang++") + find_program(CLANG "clang") + if (CLANG AND CLANGPP) + message(STATUS "Found ${CLANGPP}, ${CLANG}") + set(CMAKE_CXX_COMPILER ${CLANGPP}) + set(CMAKE_C_COMPILER ${CLANG}) + endif () +endif () + +# Options +option(BUILD_UNIT_TESTS "Build valkey-unit-tests" OFF) +option(BUILD_TEST_MODULES "Build all test modules" OFF) +option(BUILD_EXAMPLE_MODULES "Build example modules" OFF) + +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") +project("valkey") + +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED ON) +set(CMAKE_C_EXTENSIONS ON) + +include(ValkeySetup) +add_subdirectory(src) +add_subdirectory(tests) + +# Include the packaging module +include(Packaging) + +# Clear cached variables from the cache +unset(BUILD_TESTS CACHE) +unset(CLANGPP CACHE) +unset(CLANG CACHE) +unset(BUILD_RDMA_MODULE CACHE) +unset(BUILD_TLS_MODULE CACHE) +unset(BUILD_UNIT_TESTS CACHE) +unset(BUILD_TEST_MODULES CACHE) +unset(BUILD_EXAMPLE_MODULES CACHE) +unset(USE_TLS CACHE) diff --git a/README.md b/README.md index 1a8ce1a4db..94f38bccf7 100644 --- a/README.md +++ b/README.md @@ -4,13 +4,12 @@ This project was forked from the open source Redis project right before the tran This README is just a fast *quick start* document. More details can be found under [valkey.io](https://valkey.io/) -What is Valkey? --------------- +# What is Valkey? + Valkey is a high-performance data structure server that primarily serves key/value workloads. It supports a wide range of native structures and an extensible plugin system for adding new data structures and access patterns. -Building Valkey --------------- +# Building Valkey using `Makefile` Valkey can be compiled and used on Linux, OSX, OpenBSD, NetBSD, FreeBSD. We support big endian and little endian architectures, and both 32 bit @@ -43,7 +42,7 @@ supports RDMA as connection module mode. Run: % make BUILD_RDMA=module -To build with systemd support, you'll need systemd development libraries (such +To build with systemd support, you'll need systemd development libraries (such as libsystemd-dev on Debian/Ubuntu or systemd-devel on CentOS) and run: % make USE_SYSTEMD=yes @@ -71,8 +70,7 @@ More about running the integration tests can be found in [tests/README.md](tests/README.md) and for unit tests, see [src/unit/README.md](src/unit/README.md). -Fixing build problems with dependencies or cached build options ---------- +## Fixing build problems with dependencies or cached build options Valkey has some dependencies which are included in the `deps` directory. `make` does not automatically rebuild dependencies even if something in @@ -91,8 +89,7 @@ optimizations (for debugging purposes), and other similar build time options, those options are cached indefinitely until you issue a `make distclean` command. -Fixing problems building 32 bit binaries ---------- +## Fixing problems building 32 bit binaries If after building Valkey with a 32 bit target you need to rebuild it with a 64 bit target, or the other way around, you need to perform a @@ -105,8 +102,7 @@ the following steps: * Try using the following command line instead of `make 32bit`: `make CFLAGS="-m32 -march=native" LDFLAGS="-m32"` -Allocator ---------- +## Allocator Selecting a non-default memory allocator when building Valkey is done by setting the `MALLOC` environment variable. Valkey is compiled and linked against libc @@ -122,28 +118,25 @@ To compile against jemalloc on Mac OS X systems, use: % make MALLOC=jemalloc -Monotonic clock ---------------- +## Monotonic clock By default, Valkey will build using the POSIX clock_gettime function as the monotonic clock source. On most modern systems, the internal processor clock -can be used to improve performance. Cautions can be found here: +can be used to improve performance. Cautions can be found here: http://oliveryang.net/2015/09/pitfalls-of-TSC-usage/ To build with support for the processor's internal instruction clock, use: % make CFLAGS="-DUSE_PROCESSOR_CLOCK" -Verbose build -------------- +## Verbose build Valkey will build with a user-friendly colorized output by default. If you want to see a more verbose output, use the following: % make V=1 -Running Valkey -------------- +# Running Valkey To run Valkey with the default configuration, just type: @@ -165,10 +158,10 @@ as options using the command line. Examples: All the options in valkey.conf are also supported as options using the command line, with exactly the same name. -Running Valkey with TLS: ------------------- +# Running Valkey with TLS: + +## Running manually -### Running manually To manually run a Valkey server with TLS mode (assuming `./gen-test-certs.sh` was invoked so sample certificates/keys are available): * TLS built-in mode: @@ -204,8 +197,7 @@ Specifying `--tls-replication yes` makes a replica connect to the primary. Using `--tls-cluster yes` makes Valkey Cluster use TLS across nodes. -Running Valkey with RDMA: ------------------- +# Running Valkey with RDMA: Note that Valkey Over RDMA is an experimental feature. It may be changed or removed in any minor or major version. @@ -236,8 +228,7 @@ Or: % ibv_devices -Playing with Valkey ------------------- +# Playing with Valkey You can use valkey-cli to play with Valkey. Start a valkey-server instance, then in another terminal try the following: @@ -256,8 +247,7 @@ then in another terminal try the following: (integer) 2 valkey> -Installing Valkey ------------------ +# Installing Valkey In order to install Valkey binaries into /usr/local/bin, just use: @@ -289,16 +279,82 @@ system reboots. You'll be able to stop and start Valkey using the script named `/etc/init.d/valkey_`, for instance `/etc/init.d/valkey_6379`. -Code contributions ------------------ +# Building using `CMake` + +In addition to the traditional `Makefile` build, Valkey supports an alternative, **experimental**, build system using `CMake`. + +To build and install `Valkey`, in `Release` mode (an optimized build), type this into your terminal: + +```bash +mkdir build-release +cd $_ +cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/opt/valkey +sudo make install +# Valkey is now installed under /opt/valkey +``` + +Other options supported by Valkey's `CMake` build system: + +## Special build flags + +- `-DBUILD_TLS=` enable TLS build for Valkey +- `-DBUILD_RDMA=` enable RDMA module build (only module mode supported) +- `-DBUILD_MALLOC=` choose the allocator to use. Default on Linux: `jemalloc`, for other OS: `libc` +- `-DBUILD_SANITIZER=` build with address sanitizer enabled +- `-DBUILD_UNIT_TESTS=[1|0]` when set, the build will produce the executable `valkey-unit-tests` +- `-DBUILD_TEST_MODULES=[1|0]` when set, the build will include the modules located under the `tests/modules` folder +- `-DBUILD_EXAMPLE_MODULES=[1|0]` when set, the build will include the example modules located under the `src/modules` folder + +## Common flags + +- `-DCMAKE_BUILD_TYPE=` define the build type, see CMake manual for more details +- `-DCMAKE_INSTALL_PREFIX=/installation/path` override this value to define a custom install prefix. Default: `/usr/local` +- `-G` generate build files for "Generator Name". By default, CMake will generate `Makefile`s. + +## Verbose build + +`CMake` generates a user-friendly colorized output by default. +If you want to see a more verbose output, use the following: + +```bash +make VERBOSE=1 +``` + +## Troubleshooting + +During the `CMake` stage, `CMake` caches variables in a local file named `CMakeCache.txt`. All variables generated by Valkey +are removed from the cache once consumed (this is done by calling to `unset(VAR-NAME CACHE)`). However, some variables, +like the compiler path, are kept in cache. To start a fresh build either remove the cache file `CMakeCache.txt` from the +build folder, or delete the build folder completely. + +**It is important to re-run `CMake` when adding new source files.** + +## Integration with IDE + +During the `CMake` stage of the build, `CMake` generates a JSON file named `compile_commands.json` and places it under the +build folder. This file is used by many IDEs and text editors for providing code completion (via `clangd`). + +A small caveat is that these tools will look for `compile_commands.json` under the Valkey's top folder. +A common workaround is to create a symbolic link to it: + +```bash +cd /path/to/valkey/ +# We assume here that your build folder is `build-release` +ln -sf $(pwd)/build-release/compile_commands.json $(pwd)/compile_commands.json +``` + +Restart your IDE and voila + +# Code contributions + Please see the [CONTRIBUTING.md][2]. For security bugs and vulnerabilities, please see [SECURITY.md][3]. -[1]: https://github.com/valkey-io/valkey/blob/unstable/COPYING -[2]: https://github.com/valkey-io/valkey/blob/unstable/CONTRIBUTING.md -[3]: https://github.com/valkey-io/valkey/blob/unstable/SECURITY.md +# Valkey is an open community project under LF Projects -Valkey is an open community project under LF Projects ------------------ Valkey a Series of LF Projects, LLC 2810 N Church St, PMB 57274 Wilmington, Delaware 19802-4447 + +[1]: https://github.com/valkey-io/valkey/blob/unstable/COPYING +[2]: https://github.com/valkey-io/valkey/blob/unstable/CONTRIBUTING.md +[3]: https://github.com/valkey-io/valkey/blob/unstable/SECURITY.md diff --git a/cmake/Modules/Packaging.cmake b/cmake/Modules/Packaging.cmake new file mode 100644 index 0000000000..c7ed5c426b --- /dev/null +++ b/cmake/Modules/Packaging.cmake @@ -0,0 +1,44 @@ +set(CPACK_PACKAGE_NAME "valkey") + +valkey_parse_version(CPACK_PACKAGE_VERSION_MAJOR CPACK_PACKAGE_VERSION_MINOR CPACK_PACKAGE_VERSION_PATCH) + +set(CPACK_PACKAGE_CONTACT "maintainers@lists.valkey.io") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Valkey is an open source (BSD) high-performance key/value datastore") +set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_SOURCE_DIR}/COPYING") +set(CPACK_RESOURCE_FILE_README "${CMAKE_SOURCE_DIR}/README.md") +set(CPACK_STRIP_FILES TRUE) + +valkey_get_distro_name(DISTRO_NAME) +message(STATUS "Current host distro: ${DISTRO_NAME}") + +if (DISTRO_NAME MATCHES ubuntu + OR DISTRO_NAME MATCHES debian + OR DISTRO_NAME MATCHES mint) + message(STATUS "Adding target package for ${DISTRO_NAME}") + set(CPACK_PACKAGING_INSTALL_PREFIX "/opt/valkey") + # Debian related parameters + set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Valkey contributors") + set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) + set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT) + set(CPACK_GENERATOR "DEB") +endif () + +include(CPack) +unset(DISTRO_NAME CACHE) + +# --------------------------------------------------- +# Create a helper script for creating symbolic links +# --------------------------------------------------- +write_file( + ${CMAKE_BINARY_DIR}/CreateSymlink.sh + "\ +#!/bin/bash \n\ +if [ -z \${DESTDIR} ]; then \n\ + # Script is called during 'make install' \n\ + PREFIX=${CMAKE_INSTALL_PREFIX}/bin \n\ +else \n\ + # Script is called during 'make package' \n\ + PREFIX=\${DESTDIR}${CPACK_PACKAGING_INSTALL_PREFIX}/bin \n\ +fi \n\ +cd \$PREFIX \n\ +ln -sf \$1 \$2") diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake new file mode 100644 index 0000000000..d76f17625e --- /dev/null +++ b/cmake/Modules/SourceFiles.cmake @@ -0,0 +1,153 @@ +# ------------------------------------------------- +# Define the sources to be built +# ------------------------------------------------- + +# valkey-server source files +set(VALKEY_SERVER_SRCS + ${CMAKE_SOURCE_DIR}/src/threads_mngr.c + ${CMAKE_SOURCE_DIR}/src/adlist.c + ${CMAKE_SOURCE_DIR}/src/quicklist.c + ${CMAKE_SOURCE_DIR}/src/ae.c + ${CMAKE_SOURCE_DIR}/src/anet.c + ${CMAKE_SOURCE_DIR}/src/dict.c + ${CMAKE_SOURCE_DIR}/src/kvstore.c + ${CMAKE_SOURCE_DIR}/src/sds.c + ${CMAKE_SOURCE_DIR}/src/zmalloc.c + ${CMAKE_SOURCE_DIR}/src/lzf_c.c + ${CMAKE_SOURCE_DIR}/src/lzf_d.c + ${CMAKE_SOURCE_DIR}/src/pqsort.c + ${CMAKE_SOURCE_DIR}/src/zipmap.c + ${CMAKE_SOURCE_DIR}/src/sha1.c + ${CMAKE_SOURCE_DIR}/src/ziplist.c + ${CMAKE_SOURCE_DIR}/src/release.c + ${CMAKE_SOURCE_DIR}/src/memory_prefetch.c + ${CMAKE_SOURCE_DIR}/src/io_threads.c + ${CMAKE_SOURCE_DIR}/src/networking.c + ${CMAKE_SOURCE_DIR}/src/util.c + ${CMAKE_SOURCE_DIR}/src/object.c + ${CMAKE_SOURCE_DIR}/src/db.c + ${CMAKE_SOURCE_DIR}/src/replication.c + ${CMAKE_SOURCE_DIR}/src/rdb.c + ${CMAKE_SOURCE_DIR}/src/t_string.c + ${CMAKE_SOURCE_DIR}/src/t_list.c + ${CMAKE_SOURCE_DIR}/src/t_set.c + ${CMAKE_SOURCE_DIR}/src/t_zset.c + ${CMAKE_SOURCE_DIR}/src/t_hash.c + ${CMAKE_SOURCE_DIR}/src/config.c + ${CMAKE_SOURCE_DIR}/src/aof.c + ${CMAKE_SOURCE_DIR}/src/pubsub.c + ${CMAKE_SOURCE_DIR}/src/multi.c + ${CMAKE_SOURCE_DIR}/src/debug.c + ${CMAKE_SOURCE_DIR}/src/sort.c + ${CMAKE_SOURCE_DIR}/src/intset.c + ${CMAKE_SOURCE_DIR}/src/syncio.c + ${CMAKE_SOURCE_DIR}/src/cluster.c + ${CMAKE_SOURCE_DIR}/src/cluster_legacy.c + ${CMAKE_SOURCE_DIR}/src/cluster_slot_stats.c + ${CMAKE_SOURCE_DIR}/src/crc16.c + ${CMAKE_SOURCE_DIR}/src/endianconv.c + ${CMAKE_SOURCE_DIR}/src/slowlog.c + ${CMAKE_SOURCE_DIR}/src/eval.c + ${CMAKE_SOURCE_DIR}/src/bio.c + ${CMAKE_SOURCE_DIR}/src/rio.c + ${CMAKE_SOURCE_DIR}/src/rand.c + ${CMAKE_SOURCE_DIR}/src/memtest.c + ${CMAKE_SOURCE_DIR}/src/syscheck.c + ${CMAKE_SOURCE_DIR}/src/crcspeed.c + ${CMAKE_SOURCE_DIR}/src/crccombine.c + ${CMAKE_SOURCE_DIR}/src/crc64.c + ${CMAKE_SOURCE_DIR}/src/bitops.c + ${CMAKE_SOURCE_DIR}/src/sentinel.c + ${CMAKE_SOURCE_DIR}/src/notify.c + ${CMAKE_SOURCE_DIR}/src/setproctitle.c + ${CMAKE_SOURCE_DIR}/src/blocked.c + ${CMAKE_SOURCE_DIR}/src/hyperloglog.c + ${CMAKE_SOURCE_DIR}/src/latency.c + ${CMAKE_SOURCE_DIR}/src/sparkline.c + ${CMAKE_SOURCE_DIR}/src/valkey-check-rdb.c + ${CMAKE_SOURCE_DIR}/src/valkey-check-aof.c + ${CMAKE_SOURCE_DIR}/src/geo.c + ${CMAKE_SOURCE_DIR}/src/lazyfree.c + ${CMAKE_SOURCE_DIR}/src/module.c + ${CMAKE_SOURCE_DIR}/src/evict.c + ${CMAKE_SOURCE_DIR}/src/expire.c + ${CMAKE_SOURCE_DIR}/src/geohash.c + ${CMAKE_SOURCE_DIR}/src/geohash_helper.c + ${CMAKE_SOURCE_DIR}/src/childinfo.c + ${CMAKE_SOURCE_DIR}/src/defrag.c + ${CMAKE_SOURCE_DIR}/src/siphash.c + ${CMAKE_SOURCE_DIR}/src/rax.c + ${CMAKE_SOURCE_DIR}/src/t_stream.c + ${CMAKE_SOURCE_DIR}/src/listpack.c + ${CMAKE_SOURCE_DIR}/src/localtime.c + ${CMAKE_SOURCE_DIR}/src/lolwut.c + ${CMAKE_SOURCE_DIR}/src/lolwut5.c + ${CMAKE_SOURCE_DIR}/src/lolwut6.c + ${CMAKE_SOURCE_DIR}/src/acl.c + ${CMAKE_SOURCE_DIR}/src/tracking.c + ${CMAKE_SOURCE_DIR}/src/socket.c + ${CMAKE_SOURCE_DIR}/src/tls.c + ${CMAKE_SOURCE_DIR}/src/sha256.c + ${CMAKE_SOURCE_DIR}/src/timeout.c + ${CMAKE_SOURCE_DIR}/src/setcpuaffinity.c + ${CMAKE_SOURCE_DIR}/src/monotonic.c + ${CMAKE_SOURCE_DIR}/src/mt19937-64.c + ${CMAKE_SOURCE_DIR}/src/resp_parser.c + ${CMAKE_SOURCE_DIR}/src/call_reply.c + ${CMAKE_SOURCE_DIR}/src/script_lua.c + ${CMAKE_SOURCE_DIR}/src/script.c + ${CMAKE_SOURCE_DIR}/src/functions.c + ${CMAKE_SOURCE_DIR}/src/function_lua.c + ${CMAKE_SOURCE_DIR}/src/commands.c + ${CMAKE_SOURCE_DIR}/src/strl.c + ${CMAKE_SOURCE_DIR}/src/connection.c + ${CMAKE_SOURCE_DIR}/src/unix.c + ${CMAKE_SOURCE_DIR}/src/server.c + ${CMAKE_SOURCE_DIR}/src/logreqres.c) + +# valkey-cli +set(VALKEY_CLI_SRCS + ${CMAKE_SOURCE_DIR}/src/anet.c + ${CMAKE_SOURCE_DIR}/src/adlist.c + ${CMAKE_SOURCE_DIR}/src/dict.c + ${CMAKE_SOURCE_DIR}/src/valkey-cli.c + ${CMAKE_SOURCE_DIR}/src/zmalloc.c + ${CMAKE_SOURCE_DIR}/src/release.c + ${CMAKE_SOURCE_DIR}/src/ae.c + ${CMAKE_SOURCE_DIR}/src/serverassert.c + ${CMAKE_SOURCE_DIR}/src/crcspeed.c + ${CMAKE_SOURCE_DIR}/src/crccombine.c + ${CMAKE_SOURCE_DIR}/src/crc64.c + ${CMAKE_SOURCE_DIR}/src/siphash.c + ${CMAKE_SOURCE_DIR}/src/crc16.c + ${CMAKE_SOURCE_DIR}/src/monotonic.c + ${CMAKE_SOURCE_DIR}/src/cli_common.c + ${CMAKE_SOURCE_DIR}/src/mt19937-64.c + ${CMAKE_SOURCE_DIR}/src/strl.c + ${CMAKE_SOURCE_DIR}/src/cli_commands.c) + +# valkey-benchmark +set(VALKEY_BENCHMARK_SRCS + ${CMAKE_SOURCE_DIR}/src/ae.c + ${CMAKE_SOURCE_DIR}/src/anet.c + ${CMAKE_SOURCE_DIR}/src/valkey-benchmark.c + ${CMAKE_SOURCE_DIR}/src/adlist.c + ${CMAKE_SOURCE_DIR}/src/dict.c + ${CMAKE_SOURCE_DIR}/src/zmalloc.c + ${CMAKE_SOURCE_DIR}/src/serverassert.c + ${CMAKE_SOURCE_DIR}/src/release.c + ${CMAKE_SOURCE_DIR}/src/crcspeed.c + ${CMAKE_SOURCE_DIR}/src/crccombine.c + ${CMAKE_SOURCE_DIR}/src/crc64.c + ${CMAKE_SOURCE_DIR}/src/siphash.c + ${CMAKE_SOURCE_DIR}/src/crc16.c + ${CMAKE_SOURCE_DIR}/src/monotonic.c + ${CMAKE_SOURCE_DIR}/src/cli_common.c + ${CMAKE_SOURCE_DIR}/src/mt19937-64.c + ${CMAKE_SOURCE_DIR}/src/strl.c) + +# valkey-rdma module +set(VALKEY_RDMA_MODULE_SRCS ${CMAKE_SOURCE_DIR}/src/rdma.c) + +# valkey-tls module +set(VALKEY_TLS_MODULE_SRCS ${CMAKE_SOURCE_DIR}/src/tls.c) diff --git a/cmake/Modules/Utils.cmake b/cmake/Modules/Utils.cmake new file mode 100644 index 0000000000..304f39fb2c --- /dev/null +++ b/cmake/Modules/Utils.cmake @@ -0,0 +1,102 @@ +# Return the current host distro name. For example: ubuntu, debian, amzn etc +function (valkey_get_distro_name DISTRO_NAME) + if (LINUX AND NOT APPLE) + execute_process( + COMMAND /bin/bash "-c" "cat /etc/os-release |grep ^ID=|cut -d = -f 2" + OUTPUT_VARIABLE _OUT_VAR + OUTPUT_STRIP_TRAILING_WHITESPACE) + # clean the output + string(REPLACE "\"" "" _OUT_VAR "${_OUT_VAR}") + string(REPLACE "." "" _OUT_VAR "${_OUT_VAR}") + set(${DISTRO_NAME} + "${_OUT_VAR}" + PARENT_SCOPE) + elseif (APPLE) + set(${DISTRO_NAME} + "darwin" + PARENT_SCOPE) + elseif (IS_FREEBSD) + set(${DISTRO_NAME} + "freebsd" + PARENT_SCOPE) + else () + set(${DISTRO_NAME} + "unknown" + PARENT_SCOPE) + endif () +endfunction () + +function (valkey_parse_version OUT_MAJOR OUT_MINOR OUT_PATCH) + # Read and parse package version from version.h file + file(STRINGS ${CMAKE_SOURCE_DIR}/src/version.h VERSION_LINES) + foreach (LINE ${VERSION_LINES}) + string(FIND "${LINE}" "#define VALKEY_VERSION " VERSION_STR_POS) + if (VERSION_STR_POS GREATER -1) + string(REPLACE "#define VALKEY_VERSION " "" LINE "${LINE}") + string(REPLACE "\"" "" LINE "${LINE}") + # Change "." to ";" to make it a list + string(REPLACE "." ";" LINE "${LINE}") + list(GET LINE 0 _MAJOR) + list(GET LINE 1 _MINOR) + list(GET LINE 2 _PATCH) + message(STATUS "Valkey version: ${_MAJOR}.${_MINOR}.${_PATCH}") + # Set the output variables + set(${OUT_MAJOR} + ${_MAJOR} + PARENT_SCOPE) + set(${OUT_MINOR} + ${_MINOR} + PARENT_SCOPE) + set(${OUT_PATCH} + ${_PATCH} + PARENT_SCOPE) + endif () + endforeach () +endfunction () + +# Given input argument `OPTION_VALUE`, check that the `OPTION_VALUE` is from the allowed values (one of: +# module/yes/no/1/0/true/false) +# +# Return value: +# +# If ARG is valid, return its number where: +# +# ~~~ +# - `no` | `0` | `off` => return `0` +# - `yes` | `1` | `on` => return `1` +# - `module` => return `2` +# ~~~ +function (valkey_parse_build_option OPTION_VALUE OUT_ARG_ENUM) + list(APPEND VALID_OPTIONS "yes") + list(APPEND VALID_OPTIONS "1") + list(APPEND VALID_OPTIONS "on") + list(APPEND VALID_OPTIONS "no") + list(APPEND VALID_OPTIONS "0") + list(APPEND VALID_OPTIONS "off") + list(APPEND VALID_OPTIONS "module") + + string(TOLOWER "${OPTION_VALUE}" OPTION_VALUE) + list(FIND VALID_OPTIONS "${ARG}" OPT_INDEX) + if (VERSION_STR_POS GREATER -1) + message(FATAL_ERROR "Invalid value passed ''${OPTION_VALUE}'") + endif () + + if ("${OPTION_VALUE}" STREQUAL "yes" + OR "${OPTION_VALUE}" STREQUAL "1" + OR "${OPTION_VALUE}" STREQUAL "on") + set(${OUT_ARG_ENUM} + 1 + PARENT_SCOPE) + elseif ( + "${OPTION_VALUE}" STREQUAL "no" + OR "${OPTION_VALUE}" STREQUAL "0" + OR "${OPTION_VALUE}" STREQUAL "off") + set(${OUT_ARG_ENUM} + 0 + PARENT_SCOPE) + else () + set(${OUT_ARG_ENUM} + 2 + PARENT_SCOPE) + endif () +endfunction () diff --git a/cmake/Modules/ValkeySetup.cmake b/cmake/Modules/ValkeySetup.cmake new file mode 100644 index 0000000000..e935c3b308 --- /dev/null +++ b/cmake/Modules/ValkeySetup.cmake @@ -0,0 +1,381 @@ +include(CheckIncludeFiles) +include(ProcessorCount) +include(Utils) + +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") +set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin") +set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") + +# Generate compile_commands.json file for IDEs code completion support +set(CMAKE_EXPORT_COMPILE_COMMANDS 1) + +processorcount(VALKEY_PROCESSOR_COUNT) +message(STATUS "Processor count: ${VALKEY_PROCESSOR_COUNT}") + +# Installed executables will have this permissions +set(VALKEY_EXE_PERMISSIONS + OWNER_EXECUTE + OWNER_WRITE + OWNER_READ + GROUP_EXECUTE + GROUP_READ + WORLD_EXECUTE + WORLD_READ) + +set(VALKEY_SERVER_CFLAGS "") +set(VALKEY_SERVER_LDFLAGS "") + +# ---------------------------------------------------- +# Helper functions & macros +# ---------------------------------------------------- +macro (add_valkey_server_compiler_options value) + set(VALKEY_SERVER_CFLAGS "${VALKEY_SERVER_CFLAGS} ${value}") +endmacro () + +macro (add_valkey_server_linker_option value) + list(APPEND VALKEY_SERVER_LDFLAGS ${value}) +endmacro () + +macro (get_valkey_server_linker_option return_value) + list(JOIN VALKEY_SERVER_LDFLAGS " " ${value} ${return_value}) +endmacro () + +set(IS_FREEBSD 0) +if (CMAKE_SYSTEM_NAME MATCHES "^.*BSD$|DragonFly") + message(STATUS "Building for FreeBSD compatible system") + set(IS_FREEBSD 1) + include_directories("/usr/local/include") + add_valkey_server_compiler_options("-DUSE_BACKTRACE") +endif () + +# Helper function for creating symbolic link so that: link -> source +macro (valkey_create_symlink source link) + install( + CODE "execute_process( \ + COMMAND /bin/bash ${CMAKE_BINARY_DIR}/CreateSymlink.sh \ + ${source} \ + ${link} \ + )" + COMPONENT "valkey") +endmacro () + +# Install a binary +macro (valkey_install_bin target) + # Install cli tool and create a redis symbolic link + install( + TARGETS ${target} + DESTINATION ${CMAKE_INSTALL_BINDIR} + PERMISSIONS ${VALKEY_EXE_PERMISSIONS} + COMPONENT "valkey") +endmacro () + +# Helper function that defines, builds and installs `target` In addition, it creates a symbolic link between the target +# and `link_name` +macro (valkey_build_and_install_bin target sources ld_flags libs link_name) + add_executable(${target} ${sources}) + + if (USE_JEMALLOC) + # Using jemalloc + target_link_libraries(${target} jemalloc) + endif () + + # Place this line last to ensure that ${ld_flags} is placed last on the linker line + target_link_libraries(${target} ${libs} ${ld_flags}) + target_link_libraries(${target} hiredis) + if (USE_TLS) + # Add required libraries needed for TLS + target_link_libraries(${target} OpenSSL::SSL hiredis_ssl) + endif () + + if (IS_FREEBSD) + target_link_libraries(${target} execinfo) + endif () + + # Install cli tool and create a redis symbolic link + valkey_install_bin(${target}) + valkey_create_symlink(${target} ${link_name}) +endmacro () + +# Helper function that defines, builds and installs `target` module. +macro (valkey_build_and_install_module target sources ld_flags libs) + add_library(${target} SHARED ${sources}) + + if (USE_JEMALLOC) + # Using jemalloc + target_link_libraries(${target} jemalloc) + endif () + + # Place this line last to ensure that ${ld_flags} is placed last on the linker line + target_link_libraries(${target} ${libs} ${ld_flags}) + if (USE_TLS) + # Add required libraries needed for TLS + target_link_libraries(${target} OpenSSL::SSL hiredis_ssl) + endif () + + if (IS_FREEBSD) + target_link_libraries(${target} execinfo) + endif () + + # Install cli tool and create a redis symbolic link + valkey_install_bin(${target}) +endmacro () + +# Determine if we are building in Release or Debug mode +if (CMAKE_BUILD_TYPE MATCHES Debug OR CMAKE_BUILD_TYPE MATCHES DebugFull) + set(VALKEY_DEBUG_BUILD 1) + set(VALKEY_RELEASE_BUILD 0) + message(STATUS "Building in debug mode") +else () + set(VALKEY_DEBUG_BUILD 0) + set(VALKEY_RELEASE_BUILD 1) + message(STATUS "Building in release mode") +endif () + +# ---------------------------------------------------- +# Helper functions - end +# ---------------------------------------------------- + +# ---------------------------------------------------- +# Build options (allocator, tls, rdma et al) +# ---------------------------------------------------- + +if (NOT BUILD_MALLOC) + if (APPLE) + set(BUILD_MALLOC "libc") + elseif (UNIX) + set(BUILD_MALLOC "jemalloc") + endif () +endif () + +# User may pass different allocator library. Using -DBUILD_MALLOC=, make sure it is a valid value +if (BUILD_MALLOC) + if ("${BUILD_MALLOC}" STREQUAL "jemalloc") + set(MALLOC_LIB "jemalloc") + add_valkey_server_compiler_options("-DUSE_JEMALLOC") + set(USE_JEMALLOC 1) + elseif ("${BUILD_MALLOC}" STREQUAL "libc") + set(MALLOC_LIB "libc") + elseif ("${BUILD_MALLOC}" STREQUAL "tcmalloc") + set(MALLOC_LIB "tcmalloc") + add_valkey_server_compiler_options("-DUSE_TCMALLOC") + elseif ("${BUILD_MALLOC}" STREQUAL "tcmalloc_minimal") + set(MALLOC_LIB "tcmalloc_minimal") + add_valkey_server_compiler_options("-DUSE_TCMALLOC") + else () + message(FATAL_ERROR "BUILD_MALLOC can be one of: jemalloc, libc, tcmalloc or tcmalloc_minimal") + endif () +endif () + +message(STATUS "Using ${MALLOC_LIB}") + +# TLS support +if (BUILD_TLS) + valkey_parse_build_option(${BUILD_TLS} USE_TLS) + if (USE_TLS EQUAL 1) + # Only search for OpenSSL if needed + find_package(OpenSSL REQUIRED) + message(STATUS "OpenSSL include dir: ${OPENSSL_INCLUDE_DIR}") + message(STATUS "OpenSSL libraries: ${OPENSSL_LIBRARIES}") + include_directories(${OPENSSL_INCLUDE_DIR}) + endif () + + if (USE_TLS EQUAL 1) + add_valkey_server_compiler_options("-DUSE_OPENSSL=1") + add_valkey_server_compiler_options("-DBUILD_TLS_MODULE=0") + else () + # Build TLS as a module RDMA can only be built as a module. So disable it + message(WARNING "BUILD_TLS can be one of: [ON | OFF | 1 | 0], but '${BUILD_TLS}' was provided") + message(STATUS "TLS support is disabled") + set(USE_TLS 0) + endif () +else () + # By default, TLS is disabled + message(STATUS "TLS is disabled") + set(USE_TLS 0) +endif () + +if (BUILD_RDMA) + set(BUILD_RDMA_MODULE 0) + # RDMA support (Linux only) + if (LINUX AND NOT APPLE) + valkey_parse_build_option(${BUILD_RDMA} USE_RDMA) + if (USE_RDMA EQUAL 2) # Module + message(STATUS "Building RDMA as module") + add_valkey_server_compiler_options("-DUSE_RDMA=2") + find_package(PkgConfig REQUIRED) + + # Locate librdmacm & libibverbs, fail if we can't find them + pkg_check_modules(RDMACM REQUIRED librdmacm) + pkg_check_modules(IBVERBS REQUIRED libibverbs) + + message(STATUS "${RDMACM_LINK_LIBRARIES};${IBVERBS_LINK_LIBRARIES}") + list(APPEND RDMA_LIBS "${RDMACM_LIBRARIES};${IBVERBS_LIBRARIES}") + unset(RDMACM_LINK_LIBRARIES CACHE) + unset(IBVERBS_LINK_LIBRARIES CACHE) + set(BUILD_RDMA_MODULE 1) + elseif (USE_RDMA EQUAL 1) + # RDMA can only be built as a module. So disable it + message(WARNING "BUILD_RDMA can be one of: [NO | 0 | MODULE], but '${BUILD_RDMA}' was provided") + message(STATUS "RDMA build is disabled") + set(USE_RDMA 0) + endif () + else () + message(WARNING "RDMA is only supported on Linux platforms") + endif () +endif () + +set(BUILDING_ARM64 0) +set(BUILDING_ARM32 0) + +if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64") + set(BUILDING_ARM64 1) +endif () + +if ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm") + set(BUILDING_ARM32 1) +endif () + +message(STATUS "Building on ${CMAKE_HOST_SYSTEM_NAME}") +if (BUILDING_ARM64) + message(STATUS "Compiling valkey for ARM64") + add_valkey_server_linker_option("-funwind-tables") +endif () + +if (APPLE) + add_valkey_server_linker_option("-rdynamic") + add_valkey_server_linker_option("-ldl") +elseif (UNIX) + add_valkey_server_linker_option("-rdynamic") + add_valkey_server_linker_option("-pthread") + add_valkey_server_linker_option("-ldl") + add_valkey_server_linker_option("-lm") +endif () + +if (VALKEY_DEBUG_BUILD) + # Debug build, use enable "-fno-omit-frame-pointer" + add_valkey_server_compiler_options("-fno-omit-frame-pointer") +endif () + +# Check for Atomic +check_include_files(stdatomic.h HAVE_C11_ATOMIC) +if (HAVE_C11_ATOMIC) + add_valkey_server_compiler_options("-std=gnu11") +else () + add_valkey_server_compiler_options("-std=c99") +endif () + +# Sanitizer +if (BUILD_SANITIZER) + # For best results, force libc + set(MALLOC_LIB, "libc") + if ("${BUILD_SANITIZER}" STREQUAL "address") + add_valkey_server_compiler_options("-fsanitize=address -fno-sanitize-recover=all -fno-omit-frame-pointer") + add_valkey_server_linker_option("-fsanitize=address") + elseif ("${BUILD_SANITIZER}" STREQUAL "thread") + add_valkey_server_compiler_options("-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer") + add_valkey_server_linker_option("-fsanitize=thread") + elseif ("${BUILD_SANITIZER}" STREQUAL "undefined") + add_valkey_server_compiler_options("-fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer") + add_valkey_server_linker_option("-fsanitize=undefined") + else () + message(FATAL_ERROR "Unknown sanitizer: ${BUILD_SANITIZER}") + endif () +endif () + +include_directories("${CMAKE_SOURCE_DIR}/deps/hiredis") +include_directories("${CMAKE_SOURCE_DIR}/deps/linenoise") +include_directories("${CMAKE_SOURCE_DIR}/deps/lua/src") +include_directories("${CMAKE_SOURCE_DIR}/deps/hdr_histogram") +include_directories("${CMAKE_SOURCE_DIR}/deps/fpconv") + +add_subdirectory("${CMAKE_SOURCE_DIR}/deps") + +# Update linker flags for the allocator +if (USE_JEMALLOC) + include_directories("${CMAKE_SOURCE_DIR}/deps/jemalloc/include") +endif () + +# Common compiler flags +add_valkey_server_compiler_options("-pedantic") + +# ---------------------------------------------------- +# Build options (allocator, tls, rdma et al) - end +# ---------------------------------------------------- + +# ------------------------------------------------- +# Code Generation section +# ------------------------------------------------- +find_program(PYTHON_EXE python3) +if (PYTHON_EXE) + # Python based code generation + message(STATUS "Found python3: ${PYTHON_EXE}") + # Rule for generating commands.def file from json files + message(STATUS "Adding target generate_commands_def") + file(GLOB COMMAND_FILES_JSON "${CMAKE_SOURCE_DIR}/src/commands/*.json") + add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/commands_def_generated + DEPENDS ${COMMAND_FILES_JSON} + COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/utils/generate-command-code.py + COMMAND touch ${CMAKE_BINARY_DIR}/commands_def_generated + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/src") + add_custom_target(generate_commands_def DEPENDS ${CMAKE_BINARY_DIR}/commands_def_generated) + + # Rule for generating fmtargs.h + message(STATUS "Adding target generate_fmtargs_h") + add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/fmtargs_generated + DEPENDS ${CMAKE_SOURCE_DIR}/utils/generate-fmtargs.py + COMMAND sed '/Everything/,$$d' fmtargs.h > fmtargs.h.tmp + COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/utils/generate-fmtargs.py >> fmtargs.h.tmp + COMMAND mv fmtargs.h.tmp fmtargs.h + COMMAND touch ${CMAKE_BINARY_DIR}/fmtargs_generated + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/src") + add_custom_target(generate_fmtargs_h DEPENDS ${CMAKE_BINARY_DIR}/fmtargs_generated) + + # Rule for generating test_files.h + message(STATUS "Adding target generate_test_files_h") + file(GLOB UNIT_TEST_SRCS "${CMAKE_SOURCE_DIR}/src/unit/*.c") + add_custom_command( + OUTPUT ${CMAKE_BINARY_DIR}/test_files_generated + DEPENDS "${UNIT_TEST_SRCS};${CMAKE_SOURCE_DIR}/utils/generate-unit-test-header.py" + COMMAND ${PYTHON_EXE} ${CMAKE_SOURCE_DIR}/utils/generate-unit-test-header.py + COMMAND touch ${CMAKE_BINARY_DIR}/test_files_generated + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/src") + add_custom_target(generate_test_files_h DEPENDS ${CMAKE_BINARY_DIR}/test_files_generated) +else () + # Fake targets + add_custom_target(generate_commands_def) + add_custom_target(generate_fmtargs_h) + add_custom_target(generate_test_files_h) +endif () + +# Generate release.h file (always) +add_custom_target( + release_header + COMMAND sh -c '${CMAKE_SOURCE_DIR}/src/mkreleasehdr.sh' + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/src") + +# ------------------------------------------------- +# Code Generation section - end +# ------------------------------------------------- + +# ---------------------------------------------------------- +# All our source files are defined in SourceFiles.cmake file +# ---------------------------------------------------------- +include(SourceFiles) + +# Clear the below variables from the cache +unset(CMAKE_C_FLAGS CACHE) +unset(BUILD_SANITIZER CACHE) +unset(VALKEY_SERVER_LDFLAGS CACHE) +unset(VALKEY_SERVER_CFLAGS CACHE) +unset(PYTHON_EXE CACHE) +unset(HAVE_C11_ATOMIC CACHE) +unset(USE_TLS CACHE) +unset(USE_RDMA CACHE) +unset(BUILD_TLS CACHE) +unset(BUILD_RDMA CACHE) +unset(BUILD_MALLOC CACHE) +unset(USE_JEMALLOC CACHE) +unset(BUILD_TLS_MODULE CACHE) +unset(BUILD_TLS_BUILTIN CACHE) diff --git a/deps/CMakeLists.txt b/deps/CMakeLists.txt new file mode 100644 index 0000000000..c904b94031 --- /dev/null +++ b/deps/CMakeLists.txt @@ -0,0 +1,26 @@ +add_subdirectory(jemalloc) +add_subdirectory(lua) + +# Set hiredis options. We need to disable the defaults set in the OPTION(..) we do this by setting them in the CACHE +set(BUILD_SHARED_LIBS + OFF + CACHE BOOL "Build shared libraries") +set(DISABLE_TESTS + ON + CACHE BOOL "If tests should be compiled or not") +if (USE_TLS) # Module or no module + message(STATUS "Building hiredis_ssl") + set(ENABLE_SSL + ON + CACHE BOOL "Should we test SSL connections") +endif () + +add_subdirectory(hiredis) +add_subdirectory(linenoise) +add_subdirectory(fpconv) +add_subdirectory(hdr_histogram) + +# Clear any cached variables passed to hiredis from the cache +unset(BUILD_SHARED_LIBS CACHE) +unset(DISABLE_TESTS CACHE) +unset(ENABLE_SSL CACHE) diff --git a/deps/fpconv/CMakeLists.txt b/deps/fpconv/CMakeLists.txt new file mode 100644 index 0000000000..c586aa650a --- /dev/null +++ b/deps/fpconv/CMakeLists.txt @@ -0,0 +1,4 @@ +project(fpconv) + +set(SRCS "${CMAKE_CURRENT_LIST_DIR}/fpconv_dtoa.c" "${CMAKE_CURRENT_LIST_DIR}/fpconv_dtoa.h") +add_library(fpconv STATIC ${SRCS}) diff --git a/deps/hdr_histogram/CMakeLists.txt b/deps/hdr_histogram/CMakeLists.txt new file mode 100644 index 0000000000..7b45bd76ba --- /dev/null +++ b/deps/hdr_histogram/CMakeLists.txt @@ -0,0 +1,7 @@ +project(hdr_histogram) + +set(SRCS "${CMAKE_CURRENT_LIST_DIR}/hdr_histogram.c" "${CMAKE_CURRENT_LIST_DIR}/hdr_histogram.h" + "${CMAKE_CURRENT_LIST_DIR}/hdr_atomic.h" "${CMAKE_CURRENT_LIST_DIR}/hdr_redis_malloc.h") + +add_library(hdr_histogram STATIC ${SRCS}) +target_compile_definitions(hdr_histogram PRIVATE HDR_MALLOC_INCLUDE=\"hdr_redis_malloc.h\") diff --git a/deps/jemalloc/CMakeLists.txt b/deps/jemalloc/CMakeLists.txt new file mode 100644 index 0000000000..e79e960ec2 --- /dev/null +++ b/deps/jemalloc/CMakeLists.txt @@ -0,0 +1,23 @@ +project(jemalloc) + +# Build jemalloc using configure && make install +set(JEMALLOC_INSTALL_DIR ${CMAKE_BINARY_DIR}/jemalloc-build) +set(JEMALLOC_SRC_DIR ${CMAKE_CURRENT_LIST_DIR}) +if (NOT EXISTS ${JEMALLOC_INSTALL_DIR}/lib/libjemalloc.a) + message(STATUS "Building jemalloc (custom build)") + message(STATUS "JEMALLOC_SRC_DIR = ${JEMALLOC_SRC_DIR}") + message(STATUS "JEMALLOC_INSTALL_DIR = ${JEMALLOC_INSTALL_DIR}") + + execute_process( + COMMAND sh -c "${JEMALLOC_SRC_DIR}/configure --disable-cxx \ + --with-version=5.3.0-0-g0 --with-lg-quantum=3 --disable-cache-oblivious --with-jemalloc-prefix=je_ \ + --enable-static --disable-shared --prefix=${JEMALLOC_INSTALL_DIR}" + WORKING_DIRECTORY ${JEMALLOC_SRC_DIR} COMMAND_ERROR_IS_FATAL ANY) + execute_process(COMMAND make -j${VALKEY_PROCESSOR_COUNT} lib/libjemalloc.a install + WORKING_DIRECTORY "${JEMALLOC_SRC_DIR}") +endif () + +# Import the compiled library as a CMake target +add_library(jemalloc STATIC IMPORTED GLOBAL) +set_target_properties(jemalloc PROPERTIES IMPORTED_LOCATION "${JEMALLOC_INSTALL_DIR}/lib/libjemalloc.a" + INCLUDE_DIRECTORIES "${JEMALLOC_INSTALL_DIR}/include") diff --git a/deps/linenoise/CMakeLists.txt b/deps/linenoise/CMakeLists.txt new file mode 100644 index 0000000000..f801e4abf1 --- /dev/null +++ b/deps/linenoise/CMakeLists.txt @@ -0,0 +1,4 @@ +project(linenoise) + +set(SRCS "${CMAKE_CURRENT_LIST_DIR}/linenoise.c" "${CMAKE_CURRENT_LIST_DIR}/linenoise.h") +add_library(linenoise STATIC ${SRCS}) diff --git a/deps/lua/CMakeLists.txt b/deps/lua/CMakeLists.txt new file mode 100644 index 0000000000..e911de9232 --- /dev/null +++ b/deps/lua/CMakeLists.txt @@ -0,0 +1,44 @@ +project(lualib) + +set(LUA_SRC_DIR "${CMAKE_CURRENT_LIST_DIR}/src") +set(LUA_SRCS + ${LUA_SRC_DIR}/fpconv.c + ${LUA_SRC_DIR}/lbaselib.c + ${LUA_SRC_DIR}/lmathlib.c + ${LUA_SRC_DIR}/lstring.c + ${LUA_SRC_DIR}/lparser.c + ${LUA_SRC_DIR}/ldo.c + ${LUA_SRC_DIR}/lzio.c + ${LUA_SRC_DIR}/lmem.c + ${LUA_SRC_DIR}/strbuf.c + ${LUA_SRC_DIR}/lstrlib.c + ${LUA_SRC_DIR}/lundump.c + ${LUA_SRC_DIR}/lua_cmsgpack.c + ${LUA_SRC_DIR}/loslib.c + ${LUA_SRC_DIR}/lua_struct.c + ${LUA_SRC_DIR}/ldebug.c + ${LUA_SRC_DIR}/lobject.c + ${LUA_SRC_DIR}/ldump.c + ${LUA_SRC_DIR}/lua_cjson.c + ${LUA_SRC_DIR}/ldblib.c + ${LUA_SRC_DIR}/ltm.c + ${LUA_SRC_DIR}/ltable.c + ${LUA_SRC_DIR}/lstate.c + ${LUA_SRC_DIR}/lua_bit.c + ${LUA_SRC_DIR}/lua.c + ${LUA_SRC_DIR}/loadlib.c + ${LUA_SRC_DIR}/lcode.c + ${LUA_SRC_DIR}/lapi.c + ${LUA_SRC_DIR}/lgc.c + ${LUA_SRC_DIR}/lvm.c + ${LUA_SRC_DIR}/lfunc.c + ${LUA_SRC_DIR}/lauxlib.c + ${LUA_SRC_DIR}/ltablib.c + ${LUA_SRC_DIR}/linit.c + ${LUA_SRC_DIR}/lopcodes.c + ${LUA_SRC_DIR}/llex.c + ${LUA_SRC_DIR}/liolib.c) + +add_library(lualib STATIC "${LUA_SRCS}") +target_include_directories(lualib PUBLIC "${LUA_SRC_DIR}") +target_compile_definitions(lualib PRIVATE ENABLE_CJSON_GLOBAL) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000000..b7e328163b --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,77 @@ +project(valkey-server) + +set(INSTALL_BIN_PATH ${CMAKE_INSTALL_PREFIX}/bin) +set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM 1) + +# Target: valkey-server +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${VALKEY_SERVER_CFLAGS}") +message(STATUS "CFLAGS: ${CMAKE_C_FLAGS}") + +get_valkey_server_linker_option(VALKEY_SERVER_LDFLAGS) +list(APPEND SERVER_LIBS "fpconv") +list(APPEND SERVER_LIBS "lualib") +list(APPEND SERVER_LIBS "hdr_histogram") +valkey_build_and_install_bin(valkey-server "${VALKEY_SERVER_SRCS}" "${VALKEY_SERVER_LDFLAGS}" "${SERVER_LIBS}" + "redis-server") +add_dependencies(valkey-server generate_commands_def) +add_dependencies(valkey-server generate_fmtargs_h) +add_dependencies(valkey-server release_header) + +if (VALKEY_RELEASE_BUILD) + # Enable LTO for Release build + set_property(TARGET valkey-server PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) +endif () + +# Target: valkey-cli +list(APPEND CLI_LIBS "linenoise") +valkey_build_and_install_bin(valkey-cli "${VALKEY_CLI_SRCS}" "${VALKEY_SERVER_LDFLAGS}" "${CLI_LIBS}" "redis-cli") +add_dependencies(valkey-cli generate_commands_def) +add_dependencies(valkey-cli generate_fmtargs_h) + +# Target: valkey-benchmark +list(APPEND BENCH_LIBS "hdr_histogram") +valkey_build_and_install_bin(valkey-benchmark "${VALKEY_BENCHMARK_SRCS}" "${VALKEY_SERVER_LDFLAGS}" "${BENCH_LIBS}" + "redis-benchmark") +add_dependencies(valkey-benchmark generate_commands_def) +add_dependencies(valkey-benchmark generate_fmtargs_h) + +# Targets: valkey-sentinel, valkey-check-aof and valkey-check-rdb are just symbolic links +valkey_create_symlink("valkey-server" "valkey-sentinel") +valkey_create_symlink("valkey-server" "valkey-check-rdb") +valkey_create_symlink("valkey-server" "valkey-check-aof") + +# Target valkey-rdma +if (BUILD_RDMA_MODULE) + set(MODULE_NAME "valkey-rdma") + message(STATUS "Building RDMA module") + add_library(${MODULE_NAME} SHARED "${VALKEY_RDMA_MODULE_SRCS}") + target_compile_options(${MODULE_NAME} PRIVATE -DBUILD_RDMA_MODULE -DUSE_RDMA=1) + target_link_libraries(${MODULE_NAME} "${RDMA_LIBS}") + # remove the "lib" prefix from the module + set_target_properties(${MODULE_NAME} PROPERTIES PREFIX "") + valkey_install_bin(${MODULE_NAME}) +endif () + +# Target valkey-tls (a module) +if (BUILD_TLS_MODULE) + message(STATUS "Building TLS as a module") + set(MODULE_NAME "valkey-tls") + add_library(${MODULE_NAME} SHARED ${VALKEY_TLS_MODULE_SRCS}) + target_compile_options(${MODULE_NAME} PRIVATE -DUSE_OPENSSL=2 -DBUILD_TLS_MODULE=2) + if (APPLE) + # Some symbols can only be resolved during runtime (they exist in the executable) + target_link_options(${MODULE_NAME} PRIVATE -undefined dynamic_lookup) + endif () + target_link_libraries(${MODULE_NAME} hiredis_ssl OpenSSL::SSL) + set_target_properties(${MODULE_NAME} PROPERTIES PREFIX "") +endif () + +if (BUILD_EXAMPLE_MODULES) + # Include the modules ("hello*") + message(STATUS "Building example modules") + add_subdirectory(modules) +endif () + +if (BUILD_UNIT_TESTS) + add_subdirectory(unit) +endif () diff --git a/src/modules/CMakeLists.txt b/src/modules/CMakeLists.txt new file mode 100644 index 0000000000..958796232f --- /dev/null +++ b/src/modules/CMakeLists.txt @@ -0,0 +1,21 @@ +# Build modules +list(APPEND MODULES_LIST "helloacl") +list(APPEND MODULES_LIST "helloblock") +list(APPEND MODULES_LIST "hellocluster") +list(APPEND MODULES_LIST "hellodict") +list(APPEND MODULES_LIST "hellohook") +list(APPEND MODULES_LIST "hellotimer") +list(APPEND MODULES_LIST "hellotype") +list(APPEND MODULES_LIST "helloworld") + +foreach (MODULE_NAME ${MODULES_LIST}) + message(STATUS "Building module: ${MODULE_NAME}") + add_library(${MODULE_NAME} SHARED "${CMAKE_CURRENT_LIST_DIR}/${MODULE_NAME}.c") + target_include_directories(${MODULE_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/src") + set_target_properties(${MODULE_NAME} PROPERTIES PREFIX "") + valkey_install_bin(${MODULE_NAME}) + if (APPLE) + # Some symbols can only be resolved during runtime (they exist in the executable) + target_link_options(${MODULE_NAME} PRIVATE -undefined dynamic_lookup) + endif () +endforeach () diff --git a/src/server.c b/src/server.c index eda9a5b582..e8c13dd763 100644 --- a/src/server.c +++ b/src/server.c @@ -7148,5 +7148,4 @@ __attribute__((weak)) int main(int argc, char **argv) { aeDeleteEventLoop(server.el); return 0; } - /* The End */ diff --git a/src/unit/CMakeLists.txt b/src/unit/CMakeLists.txt new file mode 100644 index 0000000000..7d80c533cf --- /dev/null +++ b/src/unit/CMakeLists.txt @@ -0,0 +1,58 @@ +project(valkey-unit-tests) + +file(GLOB UNIT_TEST_SRCS "${CMAKE_CURRENT_LIST_DIR}/*.c") +set(UNIT_TEST_SRCS "${UNIT_TEST_SRCS}") + +get_valkey_server_linker_option(VALKEY_SERVER_LDFLAGS) + +# Build unit tests only +message(STATUS "Building unit tests") +list(APPEND COMPILE_DEFINITIONS "SERVER_TEST=1") +if (USE_TLS) + if (BUILD_TLS_MODULE) + # TLS as a module + list(APPEND COMPILE_DEFINITIONS "USE_OPENSSL=2") + else (BUILD_TLS_MODULE) + # Built-in TLS support + list(APPEND COMPILE_DEFINITIONS "USE_OPENSSL=1") + list(APPEND COMPILE_DEFINITIONS "BUILD_TLS_MODULE=0") + endif () +endif () + +# Build Valkey sources as a static library for the test +add_library(valkeylib STATIC ${VALKEY_SERVER_SRCS}) +target_compile_options(valkeylib PRIVATE "${COMPILE_FLAGS}") +target_compile_definitions(valkeylib PRIVATE "${COMPILE_DEFINITIONS}") + +add_executable(valkey-unit-tests ${UNIT_TEST_SRCS}) +target_compile_options(valkey-unit-tests PRIVATE "${COMPILE_FLAGS}") +target_compile_definitions(valkey-unit-tests PRIVATE "${COMPILE_DEFINITIONS}") +add_dependencies(valkey-unit-tests generate_test_files_h) + +if (UNIX AND NOT APPLE) + # Avoid duplicate symbols on non macOS + target_link_options(valkey-unit-tests PRIVATE "-Wl,--allow-multiple-definition") +endif () + +if (USE_JEMALLOC) + # Using jemalloc + target_link_libraries(valkey-unit-tests jemalloc) +endif () + +if (IS_FREEBSD) + target_link_libraries(valkey-unit-tests execinfo) +endif () + +target_link_libraries( + valkey-unit-tests + valkeylib + fpconv + lualib + hdr_histogram + hiredis + ${VALKEY_SERVER_LDFLAGS}) + +if (USE_TLS) + # Add required libraries needed for TLS + target_link_libraries(valkey-unit-tests OpenSSL::SSL hiredis_ssl) +endif () diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt new file mode 100644 index 0000000000..2a76897bb0 --- /dev/null +++ b/tests/CMakeLists.txt @@ -0,0 +1,5 @@ +add_subdirectory(rdma) + +if (BUILD_TEST_MODULES) + add_subdirectory(modules) +endif () diff --git a/tests/modules/CMakeLists.txt b/tests/modules/CMakeLists.txt new file mode 100644 index 0000000000..0cac0c4cb6 --- /dev/null +++ b/tests/modules/CMakeLists.txt @@ -0,0 +1,58 @@ +# Build test modules +list(APPEND MODULES_LIST "commandfilter") +list(APPEND MODULES_LIST "basics") +list(APPEND MODULES_LIST "testrdb") +list(APPEND MODULES_LIST "fork") +list(APPEND MODULES_LIST "infotest") +list(APPEND MODULES_LIST "propagate") +list(APPEND MODULES_LIST "misc") +list(APPEND MODULES_LIST "hooks") +list(APPEND MODULES_LIST "blockonkeys") +list(APPEND MODULES_LIST "blockonbackground") +list(APPEND MODULES_LIST "scan") +list(APPEND MODULES_LIST "datatype") +list(APPEND MODULES_LIST "datatype2") +list(APPEND MODULES_LIST "auth") +list(APPEND MODULES_LIST "keyspace_events") +list(APPEND MODULES_LIST "blockedclient") +list(APPEND MODULES_LIST "getkeys") +list(APPEND MODULES_LIST "getchannels") +list(APPEND MODULES_LIST "test_lazyfree") +list(APPEND MODULES_LIST "timer") +list(APPEND MODULES_LIST "defragtest") +list(APPEND MODULES_LIST "keyspecs") +list(APPEND MODULES_LIST "hash") +list(APPEND MODULES_LIST "zset") +list(APPEND MODULES_LIST "stream") +list(APPEND MODULES_LIST "mallocsize") +list(APPEND MODULES_LIST "aclcheck") +list(APPEND MODULES_LIST "list") +list(APPEND MODULES_LIST "subcommands") +list(APPEND MODULES_LIST "reply") +list(APPEND MODULES_LIST "cmdintrospection") +list(APPEND MODULES_LIST "eventloop") +list(APPEND MODULES_LIST "moduleconfigs") +list(APPEND MODULES_LIST "moduleconfigstwo") +list(APPEND MODULES_LIST "publish") +list(APPEND MODULES_LIST "usercall") +list(APPEND MODULES_LIST "postnotifications") +list(APPEND MODULES_LIST "moduleauthtwo") +list(APPEND MODULES_LIST "rdbloadsave") +list(APPEND MODULES_LIST "crash") +list(APPEND MODULES_LIST "cluster") + +foreach (MODULE_NAME ${MODULES_LIST}) + message(STATUS "Building test module: ${MODULE_NAME}") + add_library(${MODULE_NAME} SHARED "${CMAKE_SOURCE_DIR}/tests/modules/${MODULE_NAME}.c") + target_include_directories(${MODULE_NAME} PRIVATE "${CMAKE_SOURCE_DIR}/src") + if (LINUX AND NOT APPLE) + # set the std to gnu11 here, to allow crash.c to get compiled + target_compile_options(${MODULE_NAME} PRIVATE "-std=gnu11") + endif () + set_target_properties(${MODULE_NAME} PROPERTIES PREFIX "") + valkey_install_bin(${MODULE_NAME}) + if (APPLE) + # Some symbols can only be resolved during runtime (they exist in the executable) + target_link_options(${MODULE_NAME} PRIVATE -undefined dynamic_lookup) + endif () +endforeach () diff --git a/tests/rdma/CMakeLists.txt b/tests/rdma/CMakeLists.txt new file mode 100644 index 0000000000..f721e9af52 --- /dev/null +++ b/tests/rdma/CMakeLists.txt @@ -0,0 +1,9 @@ +project(rdma-test) + +# Make sure RDMA build is enabled +if (BUILD_RDMA_MODULE) + add_executable(rdma-test "${CMAKE_SOURCE_DIR}/tests/rdma/rdma-test.c") + target_link_libraries(rdma-test "${RDMA_LIBS}") + target_link_options(rdma-test PRIVATE "-pthread") + valkey_install_bin(rdma-test) +endif () From e972d564609d50e97e19672b19f7590c09b4c086 Mon Sep 17 00:00:00 2001 From: Jacob Murphy Date: Fri, 8 Nov 2024 02:25:43 +0000 Subject: [PATCH 03/60] Make sure to copy null terminator byte in dual channel code (#1272) As @madolson pointed out, these do have proper null terminators. This cleans them up to follow the rest of the code which copies the last byte explicitly, which should help reduce cognitive load and make it more resilient should code refactors occur (e.g. non-static allocation of memory, changes to other functions). --------- Signed-off-by: Jacob Murphy --- src/replication.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 6e8faff7a2..48e98ab8e7 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2697,7 +2697,7 @@ static int dualChannelReplHandleEndOffsetResponse(connection *conn, sds *err) { /* Initiate repl_provisional_primary to act as this replica temp primary until RDB is loaded */ server.repl_provisional_primary.conn = server.repl_transfer_s; - memcpy(server.repl_provisional_primary.replid, primary_replid, CONFIG_RUN_ID_SIZE); + memcpy(server.repl_provisional_primary.replid, primary_replid, sizeof(server.repl_provisional_primary.replid)); server.repl_provisional_primary.reploff = reploffset; server.repl_provisional_primary.read_reploff = reploffset; server.repl_provisional_primary.dbid = dbid; @@ -4269,7 +4269,7 @@ void replicationResurrectProvisionalPrimary(void) { /* Create a primary client, but do not initialize the read handler yet, as this replica still has a local buffer to * drain. */ replicationCreatePrimaryClientWithHandler(server.repl_transfer_s, server.repl_provisional_primary.dbid, NULL); - memcpy(server.primary->replid, server.repl_provisional_primary.replid, CONFIG_RUN_ID_SIZE); + memcpy(server.primary->replid, server.repl_provisional_primary.replid, sizeof(server.repl_provisional_primary.replid)); server.primary->reploff = server.repl_provisional_primary.reploff; server.primary->read_reploff = server.repl_provisional_primary.read_reploff; server.primary_repl_offset = server.primary->reploff; From 45d596e1216472e49b9f950a4b9a040b6e87add6 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 8 Nov 2024 16:33:01 +0800 Subject: [PATCH 04/60] RDMA: Use conn ref counter to prevent double close (#1250) RDMA: Use connection reference counter style The reference counter of connection is used to protect re-entry of closenmethod. Use this style instead the unsafe one. Signed-off-by: zhenwei pi --- src/rdma.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/src/rdma.c b/src/rdma.c index bb38baa0f1..7cdcb24913 100644 --- a/src/rdma.c +++ b/src/rdma.c @@ -1199,6 +1199,14 @@ static void connRdmaClose(connection *conn) { conn->fd = -1; } + /* If called from within a handler, schedule the close but + * keep the connection until the handler returns. + */ + if (connHasRefs(conn)) { + conn->flags |= CONN_FLAG_CLOSE_SCHEDULED; + return; + } + if (!cm_id) { return; } @@ -1689,7 +1697,6 @@ static int rdmaProcessPendingData(void) { listNode *ln; rdma_connection *rdma_conn; connection *conn; - listNode *node; int processed; processed = listLength(pending_list); @@ -1697,17 +1704,17 @@ static int rdmaProcessPendingData(void) { while ((ln = listNext(&li))) { rdma_conn = listNodeValue(ln); conn = &rdma_conn->c; - node = rdma_conn->pending_list_node; /* a connection can be disconnected by remote peer, CM event mark state as CONN_STATE_CLOSED, kick connection * read/write handler to close connection */ if (conn->state == CONN_STATE_ERROR || conn->state == CONN_STATE_CLOSED) { - listDelNode(pending_list, node); - /* do NOT call callHandler(conn, conn->read_handler) here, conn is freed in handler! */ - if (conn->read_handler) { - conn->read_handler(conn); - } else if (conn->write_handler) { - conn->write_handler(conn); + listDelNode(pending_list, rdma_conn->pending_list_node); + rdma_conn->pending_list_node = NULL; + /* Invoke both read_handler and write_handler, unless read_handler + returns 0, indicating the connection has closed, in which case + write_handler will be skipped. */ + if (callHandler(conn, conn->read_handler)) { + callHandler(conn, conn->write_handler); } continue; From 0b5b2c7484e6d401ce7818571bde09b49f88180e Mon Sep 17 00:00:00 2001 From: zixuan zhao Date: Mon, 11 Nov 2024 04:33:26 -0500 Subject: [PATCH 05/60] Log as primary role (M) instead of child process (C) during startup (#1282) Init server.pid earlier to keep log message role consistent. Closes #1206. Before: ```text 24881:C 21 Oct 2024 21:10:57.165 * oO0OoO0OoO0Oo Valkey is starting oO0OoO0OoO0Oo 24881:C 21 Oct 2024 21:10:57.165 * Valkey version=255.255.255, bits=64, commit=814e0f55, modified=1, pid=24881, just started 24881:C 21 Oct 2024 21:10:57.165 * Configuration loaded 24881:M 21 Oct 2024 21:10:57.167 * Increased maximum number of open files to 10032 (it was originally set to 1024). ``` After: ```text 68560:M 08 Nov 2024 16:10:12.257 * oO0OoO0OoO0Oo Valkey is starting oO0OoO0OoO0Oo 68560:M 08 Nov 2024 16:10:12.257 * Valkey version=255.255.255, bits=64, commit=45d596e1, modified=1, pid=68560, just started 68560:M 08 Nov 2024 16:10:12.257 * Configuration loaded 68560:M 08 Nov 2024 16:10:12.258 * monotonic clock: POSIX clock_gettime ``` Signed-off-by: azuredream --- src/server.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index e8c13dd763..0c4ddbe4b8 100644 --- a/src/server.c +++ b/src/server.c @@ -2670,7 +2670,6 @@ void initServer(void) { server.aof_state = server.aof_enabled ? AOF_ON : AOF_OFF; server.fsynced_reploff = server.aof_enabled ? 0 : -1; server.hz = server.config_hz; - server.pid = getpid(); server.in_fork_child = CHILD_TYPE_NONE; server.rdb_pipe_read = -1; server.rdb_child_exit_pipe = -1; @@ -6883,6 +6882,7 @@ __attribute__((weak)) int main(int argc, char **argv) { if (exec_name == NULL) exec_name = argv[0]; server.sentinel_mode = checkForSentinelMode(argc, argv, exec_name); initServerConfig(); + server.pid = getpid(); ACLInit(); /* The ACL subsystem must be initialized ASAP because the basic networking code and client creation depends on it. */ moduleInitModulesSystem(); From 9300a7ebc856356f1d55df16ddfb845773b5daca Mon Sep 17 00:00:00 2001 From: Qu Chen Date: Mon, 11 Nov 2024 01:39:48 -0800 Subject: [PATCH 06/60] Set fields to NULL after free in freeClient() (#1279) Null out several references after freeing the object in `freeClient()`. This is just to make the code more safe, to protect against use-after-free for future changes. Signed-off-by: Qu Chen --- src/networking.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/networking.c b/src/networking.c index 96dd05d505..1a008a852d 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1731,6 +1731,7 @@ void freeClient(client *c) { /* UNWATCH all the keys */ unwatchAllKeys(c); listRelease(c->watched_keys); + c->watched_keys = NULL; /* Unsubscribe from all the pubsub channels */ pubsubUnsubscribeAllChannels(c, 0); @@ -1738,16 +1739,22 @@ void freeClient(client *c) { pubsubUnsubscribeAllPatterns(c, 0); unmarkClientAsPubSub(c); dictRelease(c->pubsub_channels); + c->pubsub_channels = NULL; dictRelease(c->pubsub_patterns); + c->pubsub_patterns = NULL; dictRelease(c->pubsubshard_channels); + c->pubsubshard_channels = NULL; /* Free data structures. */ listRelease(c->reply); + c->reply = NULL; zfree(c->buf); + c->buf = NULL; freeReplicaReferencedReplBuffer(c); freeClientArgv(c); freeClientOriginalArgv(c); if (c->deferred_reply_errors) listRelease(c->deferred_reply_errors); + c->deferred_reply_errors = NULL; #ifdef LOG_REQ_RES reqresReset(c, 1); #endif From 4aacffa32da07eb09b271c7c3dfbd58c7a2cb8d1 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 11 Nov 2024 21:42:34 +0800 Subject: [PATCH 07/60] Stabilize dual replication test to avoid getting LOADING error (#1288) When doing `$replica replicaof no one`, we may get a LOADING error, this is because during the test execution, the replica may reconnect very quickly, and the full sync is initiated, and the replica has entered the LOADING state. In this commit, we make sure the primary is pasued after the fork, so the replica won't enter the LOADING state, and with this fix, this test seems more natural and predictable. Signed-off-by: Binbin --- .../integration/dual-channel-replication.tcl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index 5302030db9..05bdc130c1 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -23,14 +23,20 @@ proc get_client_id_by_last_cmd {r cmd} { return $client_id } -# Wait until the process enters a paused state, then resume the process. -proc wait_and_resume_process idx { +# Wait until the process enters a paused state. +proc wait_process_paused idx { set pid [srv $idx pid] wait_for_condition 50 1000 { [string match "T*" [exec ps -o state= -p $pid]] } else { fail "Process $pid didn't stop, current state is [exec ps -o state= -p $pid]" } +} + +# Wait until the process enters a paused state, then resume the process. +proc wait_and_resume_process idx { + set pid [srv $idx pid] + wait_process_paused $idx resume_process $pid } @@ -790,11 +796,20 @@ start_server {tags {"dual-channel-replication external:skip"}} { } else { fail "Primary did not free repl buf block after sync failure" } + # Full sync will be triggered after the replica is reconnected, pause primary main process after fork. + # In this way, in the subsequent replicaof no one, we won't get the LOADING error if the replica reconnects + # too quickly and enters the loading state. + $primary debug pause-after-fork 1 resume_process $replica_pid set res [wait_for_log_messages -1 {"*Unable to partial resync with replica * for lack of backlog*"} $loglines 2000 10] set loglines [lindex $res 1] } + # Waiting for the primary to enter the paused state, that is, make sure that bgsave is triggered. + wait_process_paused -1 $replica replicaof no one + # Resume the primary and make sure the sync is dropped. + resume_process [srv -1 pid] + $primary debug pause-after-fork 0 wait_for_condition 500 1000 { [s -1 rdb_bgsave_in_progress] eq 0 } else { From 167e8ab8de4c26a41222d94fcf0ccbd1864a9774 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 11 Nov 2024 21:43:46 +0800 Subject: [PATCH 08/60] Trigger the election immediately when doing a manual failover (#1081) Currently when a manual failover is triggeded, we will set a CLUSTER_TODO_HANDLE_FAILOVER to start the election as soon as possible in the next beforeSleep. But in fact, we won't delay the election in manual failover, waitting for the next beforeSleep to kick in will delay the election a some milliseconds. We can trigger the election immediately in this case in the same function call, without waitting for beforeSleep, which can save us some milliseconds. Signed-off-by: Binbin --- src/cluster_legacy.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index f1c9eb1fcf..04a04774fe 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -4519,8 +4519,9 @@ void clusterFailoverReplaceYourPrimary(void) { * 3) Perform the failover informing all the other nodes. */ void clusterHandleReplicaFailover(void) { + mstime_t now = mstime(); mstime_t data_age; - mstime_t auth_age = mstime() - server.cluster->failover_auth_time; + mstime_t auth_age = now - server.cluster->failover_auth_time; int needed_quorum = (server.cluster->size / 2) + 1; int manual_failover = server.cluster->mf_end != 0 && server.cluster->mf_can_start; mstime_t auth_timeout, auth_retry_time; @@ -4582,7 +4583,7 @@ void clusterHandleReplicaFailover(void) { /* If the previous failover attempt timeout and the retry time has * elapsed, we can setup a new one. */ if (auth_age > auth_retry_time) { - server.cluster->failover_auth_time = mstime() + + server.cluster->failover_auth_time = now + 500 + /* Fixed delay of 500 milliseconds, let FAIL msg propagate. */ random() % 500; /* Random delay between 0 and 500 milliseconds. */ server.cluster->failover_auth_count = 0; @@ -4594,20 +4595,26 @@ void clusterHandleReplicaFailover(void) { server.cluster->failover_auth_time += server.cluster->failover_auth_rank * 1000; /* However if this is a manual failover, no delay is needed. */ if (server.cluster->mf_end) { - server.cluster->failover_auth_time = mstime(); + server.cluster->failover_auth_time = now; server.cluster->failover_auth_rank = 0; - clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER); + /* Reset auth_age since it is outdated now and we can bypass the auth_timeout + * check in the next state and start the election ASAP. */ + auth_age = 0; } serverLog(LL_NOTICE, "Start of election delayed for %lld milliseconds " "(rank #%d, offset %lld).", - server.cluster->failover_auth_time - mstime(), server.cluster->failover_auth_rank, + server.cluster->failover_auth_time - now, server.cluster->failover_auth_rank, replicationGetReplicaOffset()); /* Now that we have a scheduled election, broadcast our offset * to all the other replicas so that they'll updated their offsets * if our offset is better. */ clusterBroadcastPong(CLUSTER_BROADCAST_LOCAL_REPLICAS); - return; + + /* Return ASAP if we can't start the election now. In a manual failover, + * we can start the election immediately, so in this case we continue to + * the next state without waiting for the next beforeSleep. */ + if (now < server.cluster->failover_auth_time) return; } /* It is possible that we received more updated offsets from other @@ -4627,7 +4634,7 @@ void clusterHandleReplicaFailover(void) { } /* Return ASAP if we can't still start the election. */ - if (mstime() < server.cluster->failover_auth_time) { + if (now < server.cluster->failover_auth_time) { clusterLogCantFailover(CLUSTER_CANT_FAILOVER_WAITING_DELAY); return; } From a2d22c63c007eee1709ca71d9bf1e912fadb4f87 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 11 Nov 2024 22:12:49 +0800 Subject: [PATCH 09/60] Fix replica not able to initate election in time when epoch fails (#1009) If multiple primary nodes go down at the same time, their replica nodes will initiate the elections at the same time. There is a certain probability that the replicas will initate the elections in the same epoch. And obviously, in our current election mechanism, only one replica node can eventually get the enough votes, and the other replica node will fail to win due the the insufficient majority, and then its election will time out and we will wait for the retry, which result in a long failure time. If another node has been won the election in the failover epoch, we can assume that my election has failed and we can retry as soom as possible. Signed-off-by: Binbin --- src/cluster_legacy.c | 18 +++++++++++++++++ tests/unit/cluster/failover2.tcl | 33 ++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 04a04774fe..ee7e4c531e 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -3135,6 +3135,24 @@ int clusterProcessPacket(clusterLink *link) { if (sender_claims_to_be_primary && sender_claimed_config_epoch > sender->configEpoch) { sender->configEpoch = sender_claimed_config_epoch; clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_FSYNC_CONFIG); + + if (server.cluster->failover_auth_time && sender->configEpoch >= server.cluster->failover_auth_epoch) { + /* Another node has claimed an epoch greater than or equal to ours. + * If we have an ongoing election, reset it because we cannot win + * with an epoch smaller than or equal to the incoming claim. This + * allows us to start a new election as soon as possible. */ + server.cluster->failover_auth_time = 0; + serverLog(LL_WARNING, + "Failover election in progress for epoch %llu, but received a claim from " + "node %.40s (%s) with an equal or higher epoch %llu. Resetting the election " + "since we cannot win an election in the past.", + (unsigned long long)server.cluster->failover_auth_epoch, + sender->name, sender->human_nodename, + (unsigned long long)sender->configEpoch); + /* Maybe we could start a new election, set a flag here to make sure + * we check as soon as possible, instead of waiting for a cron. */ + clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER); + } } /* Update the replication offset info for this node. */ sender->repl_offset = ntohu64(hdr->offset); diff --git a/tests/unit/cluster/failover2.tcl b/tests/unit/cluster/failover2.tcl index 7bc6a05e95..21c4f4a678 100644 --- a/tests/unit/cluster/failover2.tcl +++ b/tests/unit/cluster/failover2.tcl @@ -64,3 +64,36 @@ start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-ping-interval } } ;# start_cluster + + +start_cluster 7 3 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 5000}} { + test "Primaries will not time out then they are elected in the same epoch" { + # Since we have the delay time, so these node may not initiate the + # election at the same time (same epoch). But if they do, we make + # sure there is no failover timeout. + + # Killing there primary nodes. + pause_process [srv 0 pid] + pause_process [srv -1 pid] + pause_process [srv -2 pid] + + # Wait for the failover + wait_for_condition 1000 50 { + [s -7 role] == "master" && + [s -8 role] == "master" && + [s -9 role] == "master" + } else { + fail "No failover detected" + } + + # Make sure there is no failover timeout. + verify_no_log_message -7 "*Failover attempt expired*" 0 + verify_no_log_message -8 "*Failover attempt expired*" 0 + verify_no_log_message -9 "*Failover attempt expired*" 0 + + # Resuming these primary nodes, speed up the shutdown. + resume_process [srv 0 pid] + resume_process [srv -1 pid] + resume_process [srv -2 pid] + } +} ;# start_cluster From 2df56d87c0ebe802f38e8922bb2ea1e4ca9cfa76 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 11 Nov 2024 22:13:47 +0800 Subject: [PATCH 10/60] Fix empty primary may have dirty slots data due to bad migration (#1285) If we become an empty primary for some reason, we still need to check if we need to delete dirty slots, because we may have dirty slots data left over from a bad migration. Like the target node forcibly executes CLUSTER SETSLOT NODE to take over the slot without performing key migration. Signed-off-by: Binbin --- src/cluster_legacy.c | 13 ++++++++++++- tests/unit/cluster/replica-migration.tcl | 20 ++++++++++++++++++++ 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index ee7e4c531e..cfde3fd797 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2451,6 +2451,7 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc * need to delete all the keys in the slots we lost ownership. */ uint16_t dirty_slots[CLUSTER_SLOTS]; int dirty_slots_count = 0; + int delete_dirty_slots = 0; /* We should detect if sender is new primary of our shard. * We will know it if all our slots were migrated to sender, and sender @@ -2677,6 +2678,12 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc serverLog(LL_NOTICE, "My last slot was migrated to node %.40s (%s) in shard %.40s. I am now an empty primary.", sender->name, sender->human_nodename, sender->shard_id); + /* We may still have dirty slots when we became a empty primary due to + * a bad migration. + * + * In order to maintain a consistent state between keys and slots + * we need to remove all the keys from the slots we lost. */ + delete_dirty_slots = 1; } } else if (dirty_slots_count) { /* If we are here, we received an update message which removed @@ -2686,6 +2693,10 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc * * In order to maintain a consistent state between keys and slots * we need to remove all the keys from the slots we lost. */ + delete_dirty_slots = 1; + } + + if (delete_dirty_slots) { for (int j = 0; j < dirty_slots_count; j++) { serverLog(LL_NOTICE, "Deleting keys in dirty slot %d on node %.40s (%s) in shard %.40s", dirty_slots[j], myself->name, myself->human_nodename, myself->shard_id); @@ -6069,7 +6080,7 @@ void removeChannelsInSlot(unsigned int slot) { /* Remove all the keys in the specified hash slot. * The number of removed items is returned. */ unsigned int delKeysInSlot(unsigned int hashslot) { - if (!kvstoreDictSize(server.db->keys, hashslot)) return 0; + if (!countKeysInSlot(hashslot)) return 0; unsigned int j = 0; diff --git a/tests/unit/cluster/replica-migration.tcl b/tests/unit/cluster/replica-migration.tcl index 05d6528684..d04069ef16 100644 --- a/tests/unit/cluster/replica-migration.tcl +++ b/tests/unit/cluster/replica-migration.tcl @@ -400,3 +400,23 @@ start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout start_cluster 4 4 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} { test_cluster_setslot "setslot" } my_slot_allocation cluster_allocate_replicas ;# start_cluster + +start_cluster 3 0 {tags {external:skip cluster} overrides {cluster-node-timeout 1000 cluster-migration-barrier 999}} { + test "Empty primary will check and delete the dirty slots" { + R 2 config set cluster-allow-replica-migration no + + # Write a key to slot 0. + R 2 incr key_977613 + + # Move slot 0 from primary 2 to primary 0. + R 0 cluster bumpepoch + R 0 cluster setslot 0 node [R 0 cluster myid] + + # Wait for R 2 to report that it is an empty primary (cluster-allow-replica-migration no) + wait_for_log_messages -2 {"*I am now an empty primary*"} 0 1000 50 + + # Make sure primary 0 will delete the dirty slots. + verify_log_message -2 "*Deleting keys in dirty slot 0*" 0 + assert_equal [R 2 dbsize] 0 + } +} my_slot_allocation cluster_allocate_replicas ;# start_cluster From 6fba747c39bee10e27942afabd2c46be4b4fae39 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 14 Nov 2024 10:26:23 +0800 Subject: [PATCH 11/60] Fix log printing always shows the role as child under daemonize (#1301) In #1282, we init server.pid earlier to keep log message role consistent, but we forgot to consider daemonize. In daemonize mode, we will always print the child role. We need to reset server.pid after daemonize(), otherwise the log printing role will always be the child. It also causes a incorrect server.pid value, affecting the concatenation of some pid names. Signed-off-by: Binbin --- src/server.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/server.c b/src/server.c index 0c4ddbe4b8..be3982278f 100644 --- a/src/server.c +++ b/src/server.c @@ -7064,7 +7064,12 @@ __attribute__((weak)) int main(int argc, char **argv) { /* Daemonize if needed */ server.supervised = serverIsSupervised(server.supervised_mode); int background = server.daemonize && !server.supervised; - if (background) daemonize(); + if (background) { + /* We need to reset server.pid after daemonize(), otherwise the + * log printing role will always be the child. */ + daemonize(); + server.pid = getpid(); + } serverLog(LL_NOTICE, "oO0OoO0OoO0Oo Valkey is starting oO0OoO0OoO0Oo"); serverLog(LL_NOTICE, "Valkey version=%s, bits=%d, commit=%s, modified=%d, pid=%d, just started", VALKEY_VERSION, From 4a9864206f8aa1b3b33976c0a96b292d3fa4905a Mon Sep 17 00:00:00 2001 From: skyfirelee <739609084@qq.com> Date: Thu, 14 Nov 2024 10:37:44 +0800 Subject: [PATCH 12/60] Migrate quicklist unit test to new framework (#515) Migrate quicklist unit test to new unit test framework, and cleanup remaining references of SERVER_TEST, parent ticket #428. Closes #428. Signed-off-by: artikell <739609084@qq.com> Signed-off-by: Binbin Co-authored-by: Binbin --- .github/workflows/daily.yml | 49 +- src/Makefile | 3 - src/quicklist.c | 1420 --------------------- src/quicklist.h | 4 - src/server.c | 73 -- src/unit/test_files.h | 60 + src/unit/test_quicklist.c | 2300 +++++++++++++++++++++++++++++++++++ 7 files changed, 2377 insertions(+), 1532 deletions(-) create mode 100644 src/unit/test_quicklist.c diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index bcfa35c939..62eecb1fa8 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -60,7 +60,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make all-with-unit-tests SERVER_CFLAGS='-Werror -DSERVER_TEST' + run: make all-with-unit-tests SERVER_CFLAGS='-Werror' - name: testprep run: sudo apt-get install tcl8.6 tclx - name: test @@ -75,10 +75,7 @@ jobs: - name: cluster tests if: true && !contains(github.event.inputs.skiptests, 'cluster') run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}} - - name: legacy unit tests - if: true && !contains(github.event.inputs.skiptests, 'unittest') - run: ./src/valkey-server test all --accurate - - name: new unit tests + - name: unittest if: true && !contains(github.event.inputs.skiptests, 'unittest') run: ./src/valkey-unit-tests --accurate @@ -109,7 +106,7 @@ jobs: run: | apt-get update && apt-get install -y make gcc-13 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 100 - make all-with-unit-tests CC=gcc OPT=-O3 SERVER_CFLAGS='-Werror -DSERVER_TEST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3' + make all-with-unit-tests CC=gcc OPT=-O3 SERVER_CFLAGS='-Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3' - name: testprep run: apt-get install -y tcl8.6 tclx procps - name: test @@ -124,10 +121,7 @@ jobs: - name: cluster tests if: true && !contains(github.event.inputs.skiptests, 'cluster') run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}} - - name: legacy unit tests - if: true && !contains(github.event.inputs.skiptests, 'unittest') - run: ./src/valkey-server test all --accurate - - name: new unit tests + - name: unittest if: true && !contains(github.event.inputs.skiptests, 'unittest') run: ./src/valkey-unit-tests --accurate @@ -234,7 +228,7 @@ jobs: - name: make run: | sudo apt-get update && sudo apt-get install libc6-dev-i386 - make 32bit SERVER_CFLAGS='-Werror -DSERVER_TEST' + make 32bit SERVER_CFLAGS='-Werror' - name: testprep run: sudo apt-get install tcl8.6 tclx - name: test @@ -251,10 +245,7 @@ jobs: - name: cluster tests if: true && !contains(github.event.inputs.skiptests, 'cluster') run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}} - - name: legacy unit tests - if: true && !contains(github.event.inputs.skiptests, 'unittest') - run: ./src/valkey-server test all --accurate - - name: new unit tests + - name: unittest if: true && !contains(github.event.inputs.skiptests, 'unittest') run: ./src/valkey-unit-tests --accurate @@ -483,7 +474,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make valgrind SERVER_CFLAGS='-Werror -DSERVER_TEST' + run: make valgrind SERVER_CFLAGS='-Werror' - name: testprep run: | sudo apt-get update @@ -515,7 +506,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make valgrind SERVER_CFLAGS='-Werror -DSERVER_TEST' + run: make all-with-unit-tests valgrind SERVER_CFLAGS='-Werror' - name: testprep run: | sudo apt-get update @@ -526,7 +517,7 @@ jobs: - name: unittest if: true && !contains(github.event.inputs.skiptests, 'unittest') run: | - valgrind --track-origins=yes --suppressions=./src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full --log-file=err.txt ./src/valkey-server test all --valgrind + valgrind --track-origins=yes --suppressions=./src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full --log-file=err.txt ./src/valkey-unit-tests --valgrind if grep -q 0x err.txt; then cat err.txt; exit 1; fi test-valgrind-no-malloc-usable-size-test: @@ -552,7 +543,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make valgrind CFLAGS="-DNO_MALLOC_USABLE_SIZE -DSERVER_TEST" SERVER_CFLAGS='-Werror' + run: make valgrind CFLAGS="-DNO_MALLOC_USABLE_SIZE" SERVER_CFLAGS='-Werror' - name: testprep run: | sudo apt-get update @@ -584,7 +575,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make valgrind CFLAGS="-DNO_MALLOC_USABLE_SIZE -DSERVER_TEST" SERVER_CFLAGS='-Werror' + run: make all-with-unit-tests valgrind CFLAGS="-DNO_MALLOC_USABLE_SIZE" SERVER_CFLAGS='-Werror' - name: testprep run: | sudo apt-get update @@ -595,7 +586,7 @@ jobs: - name: unittest if: true && !contains(github.event.inputs.skiptests, 'unittest') run: | - valgrind --track-origins=yes --suppressions=./src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full --log-file=err.txt ./src/valkey-server test all --valgrind + valgrind --track-origins=yes --suppressions=./src/valgrind.sup --show-reachable=no --show-possibly-lost=no --leak-check=full --log-file=err.txt ./src/valkey-unit-tests --valgrind if grep -q 0x err.txt; then cat err.txt; exit 1; fi test-sanitizer-address: @@ -627,7 +618,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make all-with-unit-tests OPT=-O3 SANITIZER=address SERVER_CFLAGS='-DSERVER_TEST -Werror' + run: make all-with-unit-tests OPT=-O3 SANITIZER=address SERVER_CFLAGS='-Werror' - name: testprep run: | sudo apt-get update @@ -644,10 +635,7 @@ jobs: - name: cluster tests if: true && !contains(github.event.inputs.skiptests, 'cluster') run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}} - - name: legacy unit tests - if: true && !contains(github.event.inputs.skiptests, 'unittest') - run: ./src/valkey-server test all - - name: new unit tests + - name: unittest if: true && !contains(github.event.inputs.skiptests, 'unittest') run: ./src/valkey-unit-tests @@ -680,7 +668,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make all-with-unit-tests OPT=-O3 SANITIZER=undefined SERVER_CFLAGS='-DSERVER_TEST -Werror' LUA_DEBUG=yes # we (ab)use this flow to also check Lua C API violations + run: make all-with-unit-tests OPT=-O3 SANITIZER=undefined SERVER_CFLAGS='-Werror' LUA_DEBUG=yes # we (ab)use this flow to also check Lua C API violations - name: testprep run: | sudo apt-get update @@ -697,10 +685,7 @@ jobs: - name: cluster tests if: true && !contains(github.event.inputs.skiptests, 'cluster') run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}} - - name: legacy unit tests - if: true && !contains(github.event.inputs.skiptests, 'unittest') - run: ./src/valkey-server test all --accurate - - name: new unit tests + - name: unittest if: true && !contains(github.event.inputs.skiptests, 'unittest') run: ./src/valkey-unit-tests --accurate @@ -1031,7 +1016,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make SERVER_CFLAGS='-Werror -DSERVER_TEST' + run: make SERVER_CFLAGS='-Werror' test-freebsd: runs-on: macos-12 diff --git a/src/Makefile b/src/Makefile index ae2de1c626..21affe61a3 100644 --- a/src/Makefile +++ b/src/Makefile @@ -131,9 +131,6 @@ ifdef REDIS_LDFLAGS endif FINAL_CFLAGS=$(STD) $(WARN) $(OPT) $(DEBUG) $(CFLAGS) $(SERVER_CFLAGS) -ifeq ($(SERVER_TEST),yes) - FINAL_CFLAGS +=-DSERVER_TEST=1 -endif FINAL_LDFLAGS=$(LDFLAGS) $(OPT) $(SERVER_LDFLAGS) $(DEBUG) FINAL_LIBS=-lm DEBUG=-g -ggdb diff --git a/src/quicklist.c b/src/quicklist.c index 617d21cd8c..225fac6fdf 100644 --- a/src/quicklist.c +++ b/src/quicklist.c @@ -210,9 +210,7 @@ void quicklistRelease(quicklist *quicklist) { * Returns 1 if listpack compressed successfully. * Returns 0 if compression failed or if listpack too small to compress. */ static int __quicklistCompressNode(quicklistNode *node) { -#ifdef SERVER_TEST node->attempted_compress = 1; -#endif if (node->dont_compress) return 0; /* validate that the node is neither @@ -250,9 +248,7 @@ static int __quicklistCompressNode(quicklistNode *node) { /* Uncompress the listpack in 'node' and update encoding details. * Returns 1 on successful decode, 0 on failure to decode. */ static int __quicklistDecompressNode(quicklistNode *node) { -#ifdef SERVER_TEST node->attempted_compress = 0; -#endif node->recompress = 0; void *decompressed = zmalloc(node->sz); @@ -1692,1419 +1688,3 @@ void quicklistBookmarksClear(quicklist *ql) { /* NOTE: We do not shrink (realloc) the quick list. main use case for this * function is just before releasing the allocation. */ } - -/* The rest of this file is test cases and test helpers. */ -#ifdef SERVER_TEST -#include -#include -#include "testhelp.h" -#include - -#define yell(str, ...) printf("ERROR! " str "\n\n", __VA_ARGS__) - -#define ERROR \ - do { \ - printf("\tERROR!\n"); \ - err++; \ - } while (0) - -#define ERR(x, ...) \ - do { \ - printf("%s:%s:%d:\t", __FILE__, __func__, __LINE__); \ - printf("ERROR! " x "\n", __VA_ARGS__); \ - err++; \ - } while (0) - -#define TEST(name) printf("test — %s\n", name); -#define TEST_DESC(name, ...) printf("test — " name "\n", __VA_ARGS__); - -#define QL_TEST_VERBOSE 0 - -#define UNUSED(x) (void)(x) -static void ql_info(quicklist *ql) { -#if QL_TEST_VERBOSE - printf("Container length: %lu\n", ql->len); - printf("Container size: %lu\n", ql->count); - if (ql->head) printf("\t(zsize head: %lu)\n", lpLength(ql->head->entry)); - if (ql->tail) printf("\t(zsize tail: %lu)\n", lpLength(ql->tail->entry)); - printf("\n"); -#else - UNUSED(ql); -#endif -} - -/* Return the UNIX time in microseconds */ -static long long ustime(void) { - struct timeval tv; - long long ust; - - gettimeofday(&tv, NULL); - ust = ((long long)tv.tv_sec) * 1000000; - ust += tv.tv_usec; - return ust; -} - -/* Return the UNIX time in milliseconds */ -static long long mstime(void) { - return ustime() / 1000; -} - -/* Iterate over an entire quicklist. - * Print the list if 'print' == 1. - * - * Returns physical count of elements found by iterating over the list. */ -static int _itrprintr(quicklist *ql, int print, int forward) { - quicklistIter *iter = quicklistGetIterator(ql, forward ? AL_START_HEAD : AL_START_TAIL); - quicklistEntry entry; - int i = 0; - int p = 0; - quicklistNode *prev = NULL; - while (quicklistNext(iter, &entry)) { - if (entry.node != prev) { - /* Count the number of list nodes too */ - p++; - prev = entry.node; - } - if (print) { - int size = (entry.sz > (1 << 20)) ? 1 << 20 : entry.sz; - printf("[%3d (%2d)]: [%.*s] (%lld)\n", i, p, size, (char *)entry.value, entry.longval); - } - i++; - } - quicklistReleaseIterator(iter); - return i; -} -static int itrprintr(quicklist *ql, int print) { - return _itrprintr(ql, print, 1); -} - -static int itrprintr_rev(quicklist *ql, int print) { - return _itrprintr(ql, print, 0); -} - -#define ql_verify(a, b, c, d, e) \ - do { \ - err += _ql_verify((a), (b), (c), (d), (e)); \ - } while (0) - -static int _ql_verify_compress(quicklist *ql) { - int errors = 0; - if (quicklistAllowsCompression(ql)) { - quicklistNode *node = ql->head; - unsigned int low_raw = ql->compress; - unsigned int high_raw = ql->len - ql->compress; - - for (unsigned int at = 0; at < ql->len; at++, node = node->next) { - if (node && (at < low_raw || at >= high_raw)) { - if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) { - yell("Incorrect compression: node %d is " - "compressed at depth %d ((%u, %u); total " - "nodes: %lu; size: %zu; recompress: %d)", - at, ql->compress, low_raw, high_raw, ql->len, node->sz, node->recompress); - errors++; - } - } else { - if (node->encoding != QUICKLIST_NODE_ENCODING_LZF && !node->attempted_compress) { - yell("Incorrect non-compression: node %d is NOT " - "compressed at depth %d ((%u, %u); total " - "nodes: %lu; size: %zu; recompress: %d; attempted: %d)", - at, ql->compress, low_raw, high_raw, ql->len, node->sz, node->recompress, - node->attempted_compress); - errors++; - } - } - } - } - return errors; -} - -/* Verify list metadata matches physical list contents. */ -static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count, uint32_t head_count, uint32_t tail_count) { - int errors = 0; - - ql_info(ql); - if (len != ql->len) { - yell("quicklist length wrong: expected %d, got %lu", len, ql->len); - errors++; - } - - if (count != ql->count) { - yell("quicklist count wrong: expected %d, got %lu", count, ql->count); - errors++; - } - - int loopr = itrprintr(ql, 0); - if (loopr != (int)ql->count) { - yell("quicklist cached count not match actual count: expected %lu, got " - "%d", - ql->count, loopr); - errors++; - } - - int rloopr = itrprintr_rev(ql, 0); - if (loopr != rloopr) { - yell("quicklist has different forward count than reverse count! " - "Forward count is %d, reverse count is %d.", - loopr, rloopr); - errors++; - } - - if (ql->len == 0 && !errors) { - return errors; - } - - if (ql->head && head_count != ql->head->count && head_count != lpLength(ql->head->entry)) { - yell("quicklist head count wrong: expected %d, " - "got cached %d vs. actual %lu", - head_count, ql->head->count, lpLength(ql->head->entry)); - errors++; - } - - if (ql->tail && tail_count != ql->tail->count && tail_count != lpLength(ql->tail->entry)) { - yell("quicklist tail count wrong: expected %d, " - "got cached %u vs. actual %lu", - tail_count, ql->tail->count, lpLength(ql->tail->entry)); - errors++; - } - - errors += _ql_verify_compress(ql); - return errors; -} - -/* Release iterator and verify compress correctly. */ -static void ql_release_iterator(quicklistIter *iter) { - quicklist *ql = NULL; - if (iter) ql = iter->quicklist; - quicklistReleaseIterator(iter); - if (ql) assert(!_ql_verify_compress(ql)); -} - -/* Generate new string concatenating integer i against string 'prefix' */ -static char *genstr(char *prefix, int i) { - static char result[64] = {0}; - snprintf(result, sizeof(result), "%s%d", prefix, i); - return result; -} - -static void randstring(unsigned char *target, size_t sz) { - size_t p = 0; - int minval, maxval; - switch (rand() % 3) { - case 0: - minval = 'a'; - maxval = 'z'; - break; - case 1: - minval = '0'; - maxval = '9'; - break; - case 2: - minval = 'A'; - maxval = 'Z'; - break; - default: assert(NULL); - } - - while (p < sz) target[p++] = minval + rand() % (maxval - minval + 1); -} - -/* main test, but callable from other files */ -int quicklistTest(int argc, char *argv[], int flags) { - UNUSED(argc); - UNUSED(argv); - - int accurate = (flags & TEST_ACCURATE); - unsigned int err = 0; - int optimize_start = -(int)(sizeof(optimization_level) / sizeof(*optimization_level)); - - printf("Starting optimization offset at: %d\n", optimize_start); - - int options[] = {0, 1, 2, 3, 4, 5, 6, 10}; - int fills[] = {-5, -4, -3, -2, -1, 0, 1, 2, 32, 66, 128, 999}; - size_t option_count = sizeof(options) / sizeof(*options); - int fill_count = (int)(sizeof(fills) / sizeof(*fills)); - long long runtime[option_count]; - - for (int _i = 0; _i < (int)option_count; _i++) { - printf("Testing Compression option %d\n", options[_i]); - long long start = mstime(); - quicklistIter *iter; - - TEST("create list") { - quicklist *ql = quicklistNew(-2, options[_i]); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("add to tail of empty list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushTail(ql, "hello", 6); - /* 1 for head and 1 for tail because 1 node = head = tail */ - ql_verify(ql, 1, 1, 1, 1); - quicklistRelease(ql); - } - - TEST("add to head of empty list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushHead(ql, "hello", 6); - /* 1 for head and 1 for tail because 1 node = head = tail */ - ql_verify(ql, 1, 1, 1, 1); - quicklistRelease(ql); - } - - TEST_DESC("add to tail 5x at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 5; i++) quicklistPushTail(ql, genstr("hello", i), 32); - if (ql->count != 5) ERROR; - if (fills[f] == 32) ql_verify(ql, 1, 5, 5, 5); - quicklistRelease(ql); - } - } - - TEST_DESC("add to head 5x at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 5; i++) quicklistPushHead(ql, genstr("hello", i), 32); - if (ql->count != 5) ERROR; - if (fills[f] == 32) ql_verify(ql, 1, 5, 5, 5); - quicklistRelease(ql); - } - } - - TEST_DESC("add to tail 500x at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i), 64); - if (ql->count != 500) ERROR; - if (fills[f] == 32) ql_verify(ql, 16, 500, 32, 20); - quicklistRelease(ql); - } - } - - TEST_DESC("add to head 500x at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); - if (ql->count != 500) ERROR; - if (fills[f] == 32) ql_verify(ql, 16, 500, 20, 32); - quicklistRelease(ql); - } - } - - TEST("rotate empty") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistRotate(ql); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("Comprassion Plain node") { - for (int f = 0; f < fill_count; f++) { - size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; - - char buf[large_limit]; - quicklist *ql = quicklistNew(fills[f], 1); - for (int i = 0; i < 500; i++) { - /* Set to 256 to allow the node to be triggered to compress, - * if it is less than 48(nocompress), the test will be successful. */ - snprintf(buf, sizeof(buf), "hello%d", i); - quicklistPushHead(ql, buf, large_limit); - } - - quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); - quicklistEntry entry; - int i = 0; - while (quicklistNext(iter, &entry)) { - assert(QL_NODE_IS_PLAIN(entry.node)); - snprintf(buf, sizeof(buf), "hello%d", i); - if (strcmp((char *)entry.value, buf)) - ERR("value [%s] didn't match [%s] at position %d", entry.value, buf, i); - i++; - } - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST("NEXT plain node") { - for (int f = 0; f < fill_count; f++) { - size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; - quicklist *ql = quicklistNew(fills[f], options[_i]); - - char buf[large_limit]; - memcpy(buf, "plain", 5); - quicklistPushHead(ql, buf, large_limit); - quicklistPushHead(ql, buf, large_limit); - quicklistPushHead(ql, "packed3", 7); - quicklistPushHead(ql, "packed4", 7); - quicklistPushHead(ql, buf, large_limit); - - quicklistEntry entry; - quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); - - while (quicklistNext(iter, &entry) != 0) { - if (QL_NODE_IS_PLAIN(entry.node)) - assert(!memcmp(entry.value, "plain", 5)); - else - assert(!memcmp(entry.value, "packed", 6)); - } - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST("rotate plain node ") { - for (int f = 0; f < fill_count; f++) { - size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; - - unsigned char *data = NULL; - size_t sz; - long long lv; - int i = 0; - quicklist *ql = quicklistNew(fills[f], options[_i]); - char buf[large_limit]; - memcpy(buf, "hello1", 6); - quicklistPushHead(ql, buf, large_limit); - memcpy(buf, "hello4", 6); - quicklistPushHead(ql, buf, large_limit); - memcpy(buf, "hello3", 6); - quicklistPushHead(ql, buf, large_limit); - memcpy(buf, "hello2", 6); - quicklistPushHead(ql, buf, large_limit); - quicklistRotate(ql); - - for (i = 1; i < 5; i++) { - assert(QL_NODE_IS_PLAIN(ql->tail)); - quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv); - int temp_char = data[5]; - zfree(data); - assert(temp_char == ('0' + i)); - } - - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - } - - TEST("rotate one val once") { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - quicklistPushHead(ql, "hello", 6); - quicklistRotate(ql); - /* Ignore compression verify because listpack is - * too small to compress. */ - ql_verify(ql, 1, 1, 1, 1); - quicklistRelease(ql); - } - } - - TEST_DESC("rotate 500 val 5000 times at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - quicklistPushHead(ql, "900", 3); - quicklistPushHead(ql, "7000", 4); - quicklistPushHead(ql, "-1200", 5); - quicklistPushHead(ql, "42", 2); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 64); - ql_info(ql); - for (int i = 0; i < 5000; i++) { - ql_info(ql); - quicklistRotate(ql); - } - if (fills[f] == 1) - ql_verify(ql, 504, 504, 1, 1); - else if (fills[f] == 2) - ql_verify(ql, 252, 504, 2, 2); - else if (fills[f] == 32) - ql_verify(ql, 16, 504, 32, 24); - quicklistRelease(ql); - } - } - - TEST("pop empty") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPop(ql, QUICKLIST_HEAD, NULL, NULL, NULL); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("pop 1 string from 1") { - quicklist *ql = quicklistNew(-2, options[_i]); - char *populate = genstr("hello", 331); - quicklistPushHead(ql, populate, 32); - unsigned char *data; - size_t sz; - long long lv; - ql_info(ql); - assert(quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv)); - assert(data != NULL); - assert(sz == 32); - if (strcmp(populate, (char *)data)) { - int size = sz; - ERR("Pop'd value (%.*s) didn't equal original value (%s)", size, data, populate); - } - zfree(data); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("pop head 1 number from 1") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushHead(ql, "55513", 5); - unsigned char *data; - size_t sz; - long long lv; - ql_info(ql); - assert(quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv)); - assert(data == NULL); - assert(lv == 55513); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("pop head 500 from 500") { - quicklist *ql = quicklistNew(-2, options[_i]); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); - ql_info(ql); - for (int i = 0; i < 500; i++) { - unsigned char *data; - size_t sz; - long long lv; - int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv); - assert(ret == 1); - assert(data != NULL); - assert(sz == 32); - if (strcmp(genstr("hello", 499 - i), (char *)data)) { - int size = sz; - ERR("Pop'd value (%.*s) didn't equal original value (%s)", size, data, genstr("hello", 499 - i)); - } - zfree(data); - } - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("pop head 5000 from 500") { - quicklist *ql = quicklistNew(-2, options[_i]); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); - for (int i = 0; i < 5000; i++) { - unsigned char *data; - size_t sz; - long long lv; - int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv); - if (i < 500) { - assert(ret == 1); - assert(data != NULL); - assert(sz == 32); - if (strcmp(genstr("hello", 499 - i), (char *)data)) { - int size = sz; - ERR("Pop'd value (%.*s) didn't equal original value " - "(%s)", - size, data, genstr("hello", 499 - i)); - } - zfree(data); - } else { - assert(ret == 0); - } - } - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("iterate forward over 500 list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); - quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD); - quicklistEntry entry; - int i = 499, count = 0; - while (quicklistNext(iter, &entry)) { - char *h = genstr("hello", i); - if (strcmp((char *)entry.value, h)) - ERR("value [%s] didn't match [%s] at position %d", entry.value, h, i); - i--; - count++; - } - if (count != 500) ERR("Didn't iterate over exactly 500 elements (%d)", i); - ql_verify(ql, 16, 500, 20, 32); - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST("iterate reverse over 500 list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); - quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); - quicklistEntry entry; - int i = 0; - while (quicklistNext(iter, &entry)) { - char *h = genstr("hello", i); - if (strcmp((char *)entry.value, h)) - ERR("value [%s] didn't match [%s] at position %d", entry.value, h, i); - i++; - } - if (i != 500) ERR("Didn't iterate over exactly 500 elements (%d)", i); - ql_verify(ql, 16, 500, 20, 32); - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST("insert after 1 element") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushHead(ql, "hello", 6); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - quicklistInsertAfter(iter, &entry, "abc", 4); - ql_release_iterator(iter); - ql_verify(ql, 1, 2, 2, 2); - - /* verify results */ - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - int sz = entry.sz; - if (strncmp((char *)entry.value, "hello", 5)) { - ERR("Value 0 didn't match, instead got: %.*s", sz, entry.value); - } - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); - sz = entry.sz; - if (strncmp((char *)entry.value, "abc", 3)) { - ERR("Value 1 didn't match, instead got: %.*s", sz, entry.value); - } - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST("insert before 1 element") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushHead(ql, "hello", 6); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - quicklistInsertBefore(iter, &entry, "abc", 4); - ql_release_iterator(iter); - ql_verify(ql, 1, 2, 2, 2); - - /* verify results */ - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - int sz = entry.sz; - if (strncmp((char *)entry.value, "abc", 3)) { - ERR("Value 0 didn't match, instead got: %.*s", sz, entry.value); - } - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); - sz = entry.sz; - if (strncmp((char *)entry.value, "hello", 5)) { - ERR("Value 1 didn't match, instead got: %.*s", sz, entry.value); - } - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST("insert head while head node is full") { - quicklist *ql = quicklistNew(4, options[_i]); - for (int i = 0; i < 10; i++) quicklistPushTail(ql, genstr("hello", i), 6); - quicklistSetFill(ql, -1); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, -10, &entry); - char buf[4096] = {0}; - quicklistInsertBefore(iter, &entry, buf, 4096); - ql_release_iterator(iter); - ql_verify(ql, 4, 11, 1, 2); - quicklistRelease(ql); - } - - TEST("insert tail while tail node is full") { - quicklist *ql = quicklistNew(4, options[_i]); - for (int i = 0; i < 10; i++) quicklistPushHead(ql, genstr("hello", i), 6); - quicklistSetFill(ql, -1); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); - char buf[4096] = {0}; - quicklistInsertAfter(iter, &entry, buf, 4096); - ql_release_iterator(iter); - ql_verify(ql, 4, 11, 2, 1); - quicklistRelease(ql); - } - - TEST_DESC("insert once in elements while iterating at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - quicklistPushTail(ql, "abc", 3); - quicklistSetFill(ql, 1); - quicklistPushTail(ql, "def", 3); /* force to unique node */ - quicklistSetFill(ql, f); - quicklistPushTail(ql, "bob", 3); /* force to reset for +3 */ - quicklistPushTail(ql, "foo", 3); - quicklistPushTail(ql, "zoo", 3); - - itrprintr(ql, 0); - /* insert "bar" before "bob" while iterating over list. */ - quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD); - quicklistEntry entry; - while (quicklistNext(iter, &entry)) { - if (!strncmp((char *)entry.value, "bob", 3)) { - /* Insert as fill = 1 so it spills into new node. */ - quicklistInsertBefore(iter, &entry, "bar", 3); - break; /* didn't we fix insert-while-iterating? */ - } - } - ql_release_iterator(iter); - itrprintr(ql, 0); - - /* verify results */ - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - int sz = entry.sz; - - if (strncmp((char *)entry.value, "abc", 3)) - ERR("Value 0 didn't match, instead got: %.*s", sz, entry.value); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); - if (strncmp((char *)entry.value, "def", 3)) - ERR("Value 1 didn't match, instead got: %.*s", sz, entry.value); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 2, &entry); - if (strncmp((char *)entry.value, "bar", 3)) - ERR("Value 2 didn't match, instead got: %.*s", sz, entry.value); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 3, &entry); - if (strncmp((char *)entry.value, "bob", 3)) - ERR("Value 3 didn't match, instead got: %.*s", sz, entry.value); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 4, &entry); - if (strncmp((char *)entry.value, "foo", 3)) - ERR("Value 4 didn't match, instead got: %.*s", sz, entry.value); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 5, &entry); - if (strncmp((char *)entry.value, "zoo", 3)) - ERR("Value 5 didn't match, instead got: %.*s", sz, entry.value); - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST_DESC("insert [before] 250 new in middle of 500 elements at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i), 32); - for (int i = 0; i < 250; i++) { - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, 250, &entry); - quicklistInsertBefore(iter, &entry, genstr("abc", i), 32); - ql_release_iterator(iter); - } - if (fills[f] == 32) ql_verify(ql, 25, 750, 32, 20); - quicklistRelease(ql); - } - } - - TEST_DESC("insert [after] 250 new in middle of 500 elements at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); - for (int i = 0; i < 250; i++) { - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, 250, &entry); - quicklistInsertAfter(iter, &entry, genstr("abc", i), 32); - ql_release_iterator(iter); - } - - if (ql->count != 750) ERR("List size not 750, but rather %ld", ql->count); - - if (fills[f] == 32) ql_verify(ql, 26, 750, 20, 32); - quicklistRelease(ql); - } - } - - TEST("duplicate empty list") { - quicklist *ql = quicklistNew(-2, options[_i]); - ql_verify(ql, 0, 0, 0, 0); - quicklist *copy = quicklistDup(ql); - ql_verify(copy, 0, 0, 0, 0); - quicklistRelease(ql); - quicklistRelease(copy); - } - - TEST("duplicate list of 1 element") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushHead(ql, genstr("hello", 3), 32); - ql_verify(ql, 1, 1, 1, 1); - quicklist *copy = quicklistDup(ql); - ql_verify(copy, 1, 1, 1, 1); - quicklistRelease(ql); - quicklistRelease(copy); - } - - TEST("duplicate list of 500") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); - ql_verify(ql, 16, 500, 20, 32); - - quicklist *copy = quicklistDup(ql); - ql_verify(copy, 16, 500, 20, 32); - quicklistRelease(ql); - quicklistRelease(copy); - } - - for (int f = 0; f < fill_count; f++) { - TEST_DESC("index 1,200 from 500 list at fill %d at compress %d", f, options[_i]) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); - if (strcmp((char *)entry.value, "hello2") != 0) ERR("Value: %s", entry.value); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 200, &entry); - if (strcmp((char *)entry.value, "hello201") != 0) ERR("Value: %s", entry.value); - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST_DESC("index -1,-2 from 500 list at fill %d at compress %d", fills[f], options[_i]) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); - if (strcmp((char *)entry.value, "hello500") != 0) ERR("Value: %s", entry.value); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -2, &entry); - if (strcmp((char *)entry.value, "hello499") != 0) ERR("Value: %s", entry.value); - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST_DESC("index -100 from 500 list at fill %d at compress %d", fills[f], options[_i]) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, -100, &entry); - if (strcmp((char *)entry.value, "hello401") != 0) ERR("Value: %s", entry.value); - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST_DESC("index too big +1 from 50 list at fill %d at compress %d", fills[f], options[_i]) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - for (int i = 0; i < 50; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - quicklistEntry entry; - int sz = entry.sz; - iter = quicklistGetIteratorEntryAtIdx(ql, 50, &entry); - if (iter) ERR("Index found at 50 with 50 list: %.*s", sz, entry.value); - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST("delete range empty list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistDelRange(ql, 5, 20); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("delete range of entire node in list of one node") { - quicklist *ql = quicklistNew(-2, options[_i]); - for (int i = 0; i < 32; i++) quicklistPushHead(ql, genstr("hello", i), 32); - ql_verify(ql, 1, 32, 32, 32); - quicklistDelRange(ql, 0, 32); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("delete range of entire node with overflow counts") { - quicklist *ql = quicklistNew(-2, options[_i]); - for (int i = 0; i < 32; i++) quicklistPushHead(ql, genstr("hello", i), 32); - ql_verify(ql, 1, 32, 32, 32); - quicklistDelRange(ql, 0, 128); - ql_verify(ql, 0, 0, 0, 0); - quicklistRelease(ql); - } - - TEST("delete middle 100 of 500 list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - ql_verify(ql, 16, 500, 32, 20); - quicklistDelRange(ql, 200, 100); - ql_verify(ql, 14, 400, 32, 20); - quicklistRelease(ql); - } - - TEST("delete less than fill but across nodes") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - ql_verify(ql, 16, 500, 32, 20); - quicklistDelRange(ql, 60, 10); - ql_verify(ql, 16, 490, 32, 20); - quicklistRelease(ql); - } - - TEST("delete negative 1 from 500 list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - ql_verify(ql, 16, 500, 32, 20); - quicklistDelRange(ql, -1, 1); - ql_verify(ql, 16, 499, 32, 19); - quicklistRelease(ql); - } - - TEST("delete negative 1 from 500 list with overflow counts") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - ql_verify(ql, 16, 500, 32, 20); - quicklistDelRange(ql, -1, 128); - ql_verify(ql, 16, 499, 32, 19); - quicklistRelease(ql); - } - - TEST("delete negative 100 from 500 list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - quicklistDelRange(ql, -100, 100); - ql_verify(ql, 13, 400, 32, 16); - quicklistRelease(ql); - } - - TEST("delete -10 count 5 from 50 list") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - for (int i = 0; i < 50; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); - ql_verify(ql, 2, 50, 32, 18); - quicklistDelRange(ql, -10, 5); - ql_verify(ql, 2, 45, 32, 13); - quicklistRelease(ql); - } - - TEST("numbers only list read") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushTail(ql, "1111", 4); - quicklistPushTail(ql, "2222", 4); - quicklistPushTail(ql, "3333", 4); - quicklistPushTail(ql, "4444", 4); - ql_verify(ql, 1, 4, 4, 4); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - if (entry.longval != 1111) ERR("Not 1111, %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); - if (entry.longval != 2222) ERR("Not 2222, %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 2, &entry); - if (entry.longval != 3333) ERR("Not 3333, %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 3, &entry); - if (entry.longval != 4444) ERR("Not 4444, %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, 4, &entry); - if (iter) ERR("Index past elements: %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); - if (entry.longval != 4444) ERR("Not 4444 (reverse), %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -2, &entry); - if (entry.longval != 3333) ERR("Not 3333 (reverse), %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -3, &entry); - if (entry.longval != 2222) ERR("Not 2222 (reverse), %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -4, &entry); - if (entry.longval != 1111) ERR("Not 1111 (reverse), %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -5, &entry); - if (iter) ERR("Index past elements (reverse), %lld", entry.longval); - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST("numbers larger list read") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistSetFill(ql, 32); - char num[32]; - long long nums[5000]; - for (int i = 0; i < 5000; i++) { - nums[i] = -5157318210846258176 + i; - int sz = ll2string(num, sizeof(num), nums[i]); - quicklistPushTail(ql, num, sz); - } - quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20); - quicklistEntry entry; - for (int i = 0; i < 5000; i++) { - iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry); - if (entry.longval != nums[i]) ERR("[%d] Not longval %lld but rather %lld", i, nums[i], entry.longval); - entry.longval = 0xdeadbeef; - ql_release_iterator(iter); - } - iter = quicklistGetIteratorEntryAtIdx(ql, 5000, &entry); - if (strncmp((char *)entry.value, "xxxxxxxxxxxxxxxxxxxx", 20)) ERR("String val not match: %s", entry.value); - ql_verify(ql, 157, 5001, 32, 9); - ql_release_iterator(iter); - quicklistRelease(ql); - } - - TEST("numbers larger list read B") { - quicklist *ql = quicklistNew(-2, options[_i]); - quicklistPushTail(ql, "99", 2); - quicklistPushTail(ql, "98", 2); - quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20); - quicklistPushTail(ql, "96", 2); - quicklistPushTail(ql, "95", 2); - quicklistReplaceAtIndex(ql, 1, "foo", 3); - quicklistReplaceAtIndex(ql, -1, "bar", 3); - quicklistRelease(ql); - } - - TEST_DESC("lrem test at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - char *words[] = {"abc", "foo", "bar", "foobar", "foobared", "zap", "bar", "test", "foo"}; - char *result[] = {"abc", "foo", "foobar", "foobared", "zap", "test", "foo"}; - char *resultB[] = {"abc", "foo", "foobar", "foobared", "zap", "test"}; - for (int i = 0; i < 9; i++) quicklistPushTail(ql, words[i], strlen(words[i])); - - /* lrem 0 bar */ - quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD); - quicklistEntry entry; - int i = 0; - while (quicklistNext(iter, &entry)) { - if (quicklistCompare(&entry, (unsigned char *)"bar", 3)) { - quicklistDelEntry(iter, &entry); - } - i++; - } - ql_release_iterator(iter); - - /* check result of lrem 0 bar */ - iter = quicklistGetIterator(ql, AL_START_HEAD); - i = 0; - while (quicklistNext(iter, &entry)) { - /* Result must be: abc, foo, foobar, foobared, zap, test, - * foo */ - int sz = entry.sz; - if (strncmp((char *)entry.value, result[i], entry.sz)) { - ERR("No match at position %d, got %.*s instead of %s", i, sz, entry.value, result[i]); - } - i++; - } - ql_release_iterator(iter); - - quicklistPushTail(ql, "foo", 3); - - /* lrem -2 foo */ - iter = quicklistGetIterator(ql, AL_START_TAIL); - i = 0; - int del = 2; - while (quicklistNext(iter, &entry)) { - if (quicklistCompare(&entry, (unsigned char *)"foo", 3)) { - quicklistDelEntry(iter, &entry); - del--; - } - if (!del) break; - i++; - } - ql_release_iterator(iter); - - /* check result of lrem -2 foo */ - /* (we're ignoring the '2' part and still deleting all foo - * because - * we only have two foo) */ - iter = quicklistGetIterator(ql, AL_START_TAIL); - i = 0; - size_t resB = sizeof(resultB) / sizeof(*resultB); - while (quicklistNext(iter, &entry)) { - /* Result must be: abc, foo, foobar, foobared, zap, test, - * foo */ - int sz = entry.sz; - if (strncmp((char *)entry.value, resultB[resB - 1 - i], sz)) { - ERR("No match at position %d, got %.*s instead of %s", i, sz, entry.value, - resultB[resB - 1 - i]); - } - i++; - } - - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST_DESC("iterate reverse + delete at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - quicklistPushTail(ql, "abc", 3); - quicklistPushTail(ql, "def", 3); - quicklistPushTail(ql, "hij", 3); - quicklistPushTail(ql, "jkl", 3); - quicklistPushTail(ql, "oop", 3); - - quicklistEntry entry; - quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); - int i = 0; - while (quicklistNext(iter, &entry)) { - if (quicklistCompare(&entry, (unsigned char *)"hij", 3)) { - quicklistDelEntry(iter, &entry); - } - i++; - } - ql_release_iterator(iter); - - if (i != 5) ERR("Didn't iterate 5 times, iterated %d times.", i); - - /* Check results after deletion of "hij" */ - iter = quicklistGetIterator(ql, AL_START_HEAD); - i = 0; - char *vals[] = {"abc", "def", "jkl", "oop"}; - while (quicklistNext(iter, &entry)) { - if (!quicklistCompare(&entry, (unsigned char *)vals[i], 3)) { - ERR("Value at %d didn't match %s\n", i, vals[i]); - } - i++; - } - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST_DESC("iterator at index test at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - char num[32]; - long long nums[5000]; - for (int i = 0; i < 760; i++) { - nums[i] = -5157318210846258176 + i; - int sz = ll2string(num, sizeof(num), nums[i]); - quicklistPushTail(ql, num, sz); - } - - quicklistEntry entry; - quicklistIter *iter = quicklistGetIteratorAtIdx(ql, AL_START_HEAD, 437); - int i = 437; - while (quicklistNext(iter, &entry)) { - if (entry.longval != nums[i]) ERR("Expected %lld, but got %lld", entry.longval, nums[i]); - i++; - } - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST_DESC("ltrim test A at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - char num[32]; - long long nums[5000]; - for (int i = 0; i < 32; i++) { - nums[i] = -5157318210846258176 + i; - int sz = ll2string(num, sizeof(num), nums[i]); - quicklistPushTail(ql, num, sz); - } - if (fills[f] == 32) ql_verify(ql, 1, 32, 32, 32); - /* ltrim 25 53 (keep [25,32] inclusive = 7 remaining) */ - quicklistDelRange(ql, 0, 25); - quicklistDelRange(ql, 0, 0); - quicklistEntry entry; - for (int i = 0; i < 7; i++) { - iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry); - if (entry.longval != nums[25 + i]) - ERR("Deleted invalid range! Expected %lld but got " - "%lld", - entry.longval, nums[25 + i]); - ql_release_iterator(iter); - } - if (fills[f] == 32) ql_verify(ql, 1, 7, 7, 7); - quicklistRelease(ql); - } - } - - TEST_DESC("ltrim test B at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - /* Force-disable compression because our 33 sequential - * integers don't compress and the check always fails. */ - quicklist *ql = quicklistNew(fills[f], QUICKLIST_NOCOMPRESS); - char num[32]; - long long nums[5000]; - for (int i = 0; i < 33; i++) { - nums[i] = i; - int sz = ll2string(num, sizeof(num), nums[i]); - quicklistPushTail(ql, num, sz); - } - if (fills[f] == 32) ql_verify(ql, 2, 33, 32, 1); - /* ltrim 5 16 (keep [5,16] inclusive = 12 remaining) */ - quicklistDelRange(ql, 0, 5); - quicklistDelRange(ql, -16, 16); - if (fills[f] == 32) ql_verify(ql, 1, 12, 12, 12); - quicklistEntry entry; - - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - if (entry.longval != 5) ERR("A: longval not 5, but %lld", entry.longval); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); - if (entry.longval != 16) ERR("B! got instead: %lld", entry.longval); - quicklistPushTail(ql, "bobobob", 7); - ql_release_iterator(iter); - - iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); - int sz = entry.sz; - if (strncmp((char *)entry.value, "bobobob", 7)) - ERR("Tail doesn't match bobobob, it's %.*s instead", sz, entry.value); - ql_release_iterator(iter); - - for (int i = 0; i < 12; i++) { - iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry); - if (entry.longval != nums[5 + i]) - ERR("Deleted invalid range! Expected %lld but got " - "%lld", - entry.longval, nums[5 + i]); - ql_release_iterator(iter); - } - quicklistRelease(ql); - } - } - - TEST_DESC("ltrim test C at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - char num[32]; - long long nums[5000]; - for (int i = 0; i < 33; i++) { - nums[i] = -5157318210846258176 + i; - int sz = ll2string(num, sizeof(num), nums[i]); - quicklistPushTail(ql, num, sz); - } - if (fills[f] == 32) ql_verify(ql, 2, 33, 32, 1); - /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */ - quicklistDelRange(ql, 0, 3); - quicklistDelRange(ql, -29, 4000); /* make sure not loop forever */ - if (fills[f] == 32) ql_verify(ql, 1, 1, 1, 1); - quicklistEntry entry; - iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); - if (entry.longval != -5157318210846258173) ERROR; - ql_release_iterator(iter); - quicklistRelease(ql); - } - } - - TEST_DESC("ltrim test D at compress %d", options[_i]) { - for (int f = 0; f < fill_count; f++) { - quicklist *ql = quicklistNew(fills[f], options[_i]); - char num[32]; - long long nums[5000]; - for (int i = 0; i < 33; i++) { - nums[i] = -5157318210846258176 + i; - int sz = ll2string(num, sizeof(num), nums[i]); - quicklistPushTail(ql, num, sz); - } - if (fills[f] == 32) ql_verify(ql, 2, 33, 32, 1); - quicklistDelRange(ql, -12, 3); - if (ql->count != 30) ERR("Didn't delete exactly three elements! Count is: %lu", ql->count); - quicklistRelease(ql); - } - } - - long long stop = mstime(); - runtime[_i] = stop - start; - } - - /* Run a longer test of compression depth outside of primary test loop. */ - int list_sizes[] = {250, 251, 500, 999, 1000}; - long long start = mstime(); - int list_count = accurate ? (int)(sizeof(list_sizes) / sizeof(*list_sizes)) : 1; - for (int list = 0; list < list_count; list++) { - TEST_DESC("verify specific compression of interior nodes with %d list ", list_sizes[list]) { - for (int f = 0; f < fill_count; f++) { - for (int depth = 1; depth < 40; depth++) { - /* skip over many redundant test cases */ - quicklist *ql = quicklistNew(fills[f], depth); - for (int i = 0; i < list_sizes[list]; i++) { - quicklistPushTail(ql, genstr("hello TAIL", i + 1), 64); - quicklistPushHead(ql, genstr("hello HEAD", i + 1), 64); - } - - for (int step = 0; step < 2; step++) { - /* test remove node */ - if (step == 1) { - for (int i = 0; i < list_sizes[list] / 2; i++) { - unsigned char *data; - assert(quicklistPop(ql, QUICKLIST_HEAD, &data, NULL, NULL)); - zfree(data); - assert(quicklistPop(ql, QUICKLIST_TAIL, &data, NULL, NULL)); - zfree(data); - } - } - quicklistNode *node = ql->head; - unsigned int low_raw = ql->compress; - unsigned int high_raw = ql->len - ql->compress; - - for (unsigned int at = 0; at < ql->len; at++, node = node->next) { - if (at < low_raw || at >= high_raw) { - if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) { - ERR("Incorrect compression: node %d is " - "compressed at depth %d ((%u, %u); total " - "nodes: %lu; size: %zu)", - at, depth, low_raw, high_raw, ql->len, node->sz); - } - } else { - if (node->encoding != QUICKLIST_NODE_ENCODING_LZF) { - ERR("Incorrect non-compression: node %d is NOT " - "compressed at depth %d ((%u, %u); total " - "nodes: %lu; size: %zu; attempted: %d)", - at, depth, low_raw, high_raw, ql->len, node->sz, node->attempted_compress); - } - } - } - } - - quicklistRelease(ql); - } - } - } - } - long long stop = mstime(); - - printf("\n"); - for (size_t i = 0; i < option_count; i++) - printf("Test Loop %02d: %0.2f seconds.\n", options[i], (float)runtime[i] / 1000); - printf("Compressions: %0.2f seconds.\n", (float)(stop - start) / 1000); - printf("\n"); - - TEST("bookmark get updated to next item") { - quicklist *ql = quicklistNew(1, 0); - quicklistPushTail(ql, "1", 1); - quicklistPushTail(ql, "2", 1); - quicklistPushTail(ql, "3", 1); - quicklistPushTail(ql, "4", 1); - quicklistPushTail(ql, "5", 1); - assert(ql->len == 5); - /* add two bookmarks, one pointing to the node before the last. */ - assert(quicklistBookmarkCreate(&ql, "_dummy", ql->head->next)); - assert(quicklistBookmarkCreate(&ql, "_test", ql->tail->prev)); - /* test that the bookmark returns the right node, delete it and see that the bookmark points to the last node */ - assert(quicklistBookmarkFind(ql, "_test") == ql->tail->prev); - assert(quicklistDelRange(ql, -2, 1)); - assert(quicklistBookmarkFind(ql, "_test") == ql->tail); - /* delete the last node, and see that the bookmark was deleted. */ - assert(quicklistDelRange(ql, -1, 1)); - assert(quicklistBookmarkFind(ql, "_test") == NULL); - /* test that other bookmarks aren't affected */ - assert(quicklistBookmarkFind(ql, "_dummy") == ql->head->next); - assert(quicklistBookmarkFind(ql, "_missing") == NULL); - assert(ql->len == 3); - quicklistBookmarksClear(ql); /* for coverage */ - assert(quicklistBookmarkFind(ql, "_dummy") == NULL); - quicklistRelease(ql); - } - - TEST("bookmark limit") { - int i; - quicklist *ql = quicklistNew(1, 0); - quicklistPushHead(ql, "1", 1); - for (i = 0; i < QL_MAX_BM; i++) assert(quicklistBookmarkCreate(&ql, genstr("", i), ql->head)); - /* when all bookmarks are used, creation fails */ - assert(!quicklistBookmarkCreate(&ql, "_test", ql->head)); - /* delete one and see that we can now create another */ - assert(quicklistBookmarkDelete(ql, "0")); - assert(quicklistBookmarkCreate(&ql, "_test", ql->head)); - /* delete one and see that the rest survive */ - assert(quicklistBookmarkDelete(ql, "_test")); - for (i = 1; i < QL_MAX_BM; i++) assert(quicklistBookmarkFind(ql, genstr("", i)) == ql->head); - /* make sure the deleted ones are indeed gone */ - assert(!quicklistBookmarkFind(ql, "0")); - assert(!quicklistBookmarkFind(ql, "_test")); - quicklistRelease(ql); - } - - if (flags & TEST_LARGE_MEMORY) { - TEST("compress and decompress quicklist listpack node") { - quicklistNode *node = quicklistCreateNode(); - node->entry = lpNew(0); - - /* Just to avoid triggering the assertion in __quicklistCompressNode(), - * it disables the passing of quicklist head or tail node. */ - node->prev = quicklistCreateNode(); - node->next = quicklistCreateNode(); - - /* Create a rand string */ - size_t sz = (1 << 25); /* 32MB per one entry */ - unsigned char *s = zmalloc(sz); - randstring(s, sz); - - /* Keep filling the node, until it reaches 1GB */ - for (int i = 0; i < 32; i++) { - node->entry = lpAppend(node->entry, s, sz); - quicklistNodeUpdateSz(node); - - long long start = mstime(); - assert(__quicklistCompressNode(node)); - assert(__quicklistDecompressNode(node)); - printf("Compress and decompress: %zu MB in %.2f seconds.\n", node->sz / 1024 / 1024, - (float)(mstime() - start) / 1000); - } - - zfree(s); - zfree(node->prev); - zfree(node->next); - zfree(node->entry); - zfree(node); - } - -#if ULONG_MAX >= 0xffffffffffffffff - TEST("compress and decomress quicklist plain node large than UINT32_MAX") { - size_t sz = (1ull << 32); - unsigned char *s = zmalloc(sz); - randstring(s, sz); - memcpy(s, "helloworld", 10); - memcpy(s + sz - 10, "1234567890", 10); - - quicklistNode *node = __quicklistCreateNode(QUICKLIST_NODE_CONTAINER_PLAIN, s, sz); - - /* Just to avoid triggering the assertion in __quicklistCompressNode(), - * it disables the passing of quicklist head or tail node. */ - node->prev = quicklistCreateNode(); - node->next = quicklistCreateNode(); - - long long start = mstime(); - assert(__quicklistCompressNode(node)); - assert(__quicklistDecompressNode(node)); - printf("Compress and decompress: %zu MB in %.2f seconds.\n", node->sz / 1024 / 1024, - (float)(mstime() - start) / 1000); - - assert(memcmp(node->entry, "helloworld", 10) == 0); - assert(memcmp(node->entry + sz - 10, "1234567890", 10) == 0); - zfree(node->prev); - zfree(node->next); - zfree(node->entry); - zfree(node); - } -#endif - } - - if (!err) - printf("ALL TESTS PASSED!\n"); - else - ERR("Sorry, not all tests passed! In fact, %d tests failed.", err); - - return err; -} -#endif diff --git a/src/quicklist.h b/src/quicklist.h index bb94807913..4411f823b0 100644 --- a/src/quicklist.h +++ b/src/quicklist.h @@ -198,10 +198,6 @@ quicklistNode *quicklistBookmarkFind(quicklist *ql, const char *name); void quicklistBookmarksClear(quicklist *ql); int quicklistSetPackedThreshold(size_t sz); -#ifdef SERVER_TEST -int quicklistTest(int argc, char *argv[], int flags); -#endif - /* Directions for iterators */ #define AL_START_HEAD 0 #define AL_START_TAIL 1 diff --git a/src/server.c b/src/server.c index be3982278f..3217351faf 100644 --- a/src/server.c +++ b/src/server.c @@ -6774,85 +6774,12 @@ int iAmPrimary(void) { (server.cluster_enabled && clusterNodeIsPrimary(getMyClusterNode()))); } -#ifdef SERVER_TEST -#include "testhelp.h" -#include "intset.h" /* Compact integer set structure */ - -int __failed_tests = 0; -int __test_num = 0; - -/* The flags are the following: - * --accurate: Runs tests with more iterations. - * --large-memory: Enables tests that consume more than 100mb. */ -typedef int serverTestProc(int argc, char **argv, int flags); -struct serverTest { - char *name; - serverTestProc *proc; - int failed; -} serverTests[] = { - {"quicklist", quicklistTest}, -}; -serverTestProc *getTestProcByName(const char *name) { - int numtests = sizeof(serverTests) / sizeof(struct serverTest); - for (int j = 0; j < numtests; j++) { - if (!strcasecmp(name, serverTests[j].name)) { - return serverTests[j].proc; - } - } - return NULL; -} -#endif - /* Main is marked as weak so that unit tests can use their own main function. */ __attribute__((weak)) int main(int argc, char **argv) { struct timeval tv; int j; char config_from_stdin = 0; -#ifdef SERVER_TEST - monotonicInit(); /* Required for dict tests, that are relying on monotime during dict rehashing. */ - if (argc >= 3 && !strcasecmp(argv[1], "test")) { - int flags = 0; - for (j = 3; j < argc; j++) { - char *arg = argv[j]; - if (!strcasecmp(arg, "--accurate")) - flags |= TEST_ACCURATE; - else if (!strcasecmp(arg, "--large-memory")) - flags |= TEST_LARGE_MEMORY; - else if (!strcasecmp(arg, "--valgrind")) - flags |= TEST_VALGRIND; - } - - if (!strcasecmp(argv[2], "all")) { - int numtests = sizeof(serverTests) / sizeof(struct serverTest); - for (j = 0; j < numtests; j++) { - serverTests[j].failed = (serverTests[j].proc(argc, argv, flags) != 0); - } - - /* Report tests result */ - int failed_num = 0; - for (j = 0; j < numtests; j++) { - if (serverTests[j].failed) { - failed_num++; - printf("[failed] Test - %s\n", serverTests[j].name); - } else { - printf("[ok] Test - %s\n", serverTests[j].name); - } - } - - printf("%d tests, %d passed, %d failed\n", numtests, numtests - failed_num, failed_num); - - return failed_num == 0 ? 0 : 1; - } else { - serverTestProc *proc = getTestProcByName(argv[2]); - if (!proc) return -1; /* test not found */ - return proc(argc, argv, flags); - } - - return 0; - } -#endif - /* We need to initialize our libraries, and the server configuration. */ #ifdef INIT_SETPROCTITLE_REPLACEMENT spt_init(argc, argv); diff --git a/src/unit/test_files.h b/src/unit/test_files.h index c2b062039a..87bc031fb4 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -84,6 +84,64 @@ int test_listpackBenchmarkLpValidateIntegrity(int argc, char **argv, int flags); int test_listpackBenchmarkLpCompareWithString(int argc, char **argv, int flags); int test_listpackBenchmarkLpCompareWithNumber(int argc, char **argv, int flags); int test_listpackBenchmarkFree(int argc, char **argv, int flags); +int test_quicklistCreateList(int argc, char **argv, int flags); +int test_quicklistAddToTailOfEmptyList(int argc, char **argv, int flags); +int test_quicklistAddToHeadOfEmptyList(int argc, char **argv, int flags); +int test_quicklistAddToTail5xAtCompress(int argc, char **argv, int flags); +int test_quicklistAddToHead5xAtCompress(int argc, char **argv, int flags); +int test_quicklistAddToTail500xAtCompress(int argc, char **argv, int flags); +int test_quicklistAddToHead500xAtCompress(int argc, char **argv, int flags); +int test_quicklistRotateEmpty(int argc, char **argv, int flags); +int test_quicklistComprassionPlainNode(int argc, char **argv, int flags); +int test_quicklistNextPlainNode(int argc, char **argv, int flags); +int test_quicklistRotatePlainNode(int argc, char **argv, int flags); +int test_quicklistRotateOneValOnce(int argc, char **argv, int flags); +int test_quicklistRotate500Val5000TimesAtCompress(int argc, char **argv, int flags); +int test_quicklistPopEmpty(int argc, char **argv, int flags); +int test_quicklistPop1StringFrom1(int argc, char **argv, int flags); +int test_quicklistPopHead1NumberFrom1(int argc, char **argv, int flags); +int test_quicklistPopHead500From500(int argc, char **argv, int flags); +int test_quicklistPopHead5000From500(int argc, char **argv, int flags); +int test_quicklistIterateForwardOver500List(int argc, char **argv, int flags); +int test_quicklistIterateReverseOver500List(int argc, char **argv, int flags); +int test_quicklistInsertAfter1Element(int argc, char **argv, int flags); +int test_quicklistInsertBefore1Element(int argc, char **argv, int flags); +int test_quicklistInsertHeadWhileHeadNodeIsFull(int argc, char **argv, int flags); +int test_quicklistInsertTailWhileTailNodeIsFull(int argc, char **argv, int flags); +int test_quicklistInsertOnceInElementsWhileIteratingAtCompress(int argc, char **argv, int flags); +int test_quicklistInsertBefore250NewInMiddleOf500ElementsAtCompress(int argc, char **argv, int flags); +int test_quicklistInsertAfter250NewInMiddleOf500ElementsAtCompress(int argc, char **argv, int flags); +int test_quicklistDuplicateEmptyList(int argc, char **argv, int flags); +int test_quicklistDuplicateListOf1Element(int argc, char **argv, int flags); +int test_quicklistDuplicateListOf500(int argc, char **argv, int flags); +int test_quicklistIndex1200From500ListAtFill(int argc, char **argv, int flags); +int test_quicklistIndex12From500ListAtFill(int argc, char **argv, int flags); +int test_quicklistIndex100From500ListAtFill(int argc, char **argv, int flags); +int test_quicklistIndexTooBig1From50ListAtFill(int argc, char **argv, int flags); +int test_quicklistDeleteRangeEmptyList(int argc, char **argv, int flags); +int test_quicklistDeleteRangeOfEntireNodeInListOfOneNode(int argc, char **argv, int flags); +int test_quicklistDeleteRangeOfEntireNodeWithOverflowCounts(int argc, char **argv, int flags); +int test_quicklistDeleteMiddle100Of500List(int argc, char **argv, int flags); +int test_quicklistDeleteLessThanFillButAcrossNodes(int argc, char **argv, int flags); +int test_quicklistDeleteNegative1From500List(int argc, char **argv, int flags); +int test_quicklistDeleteNegative1From500ListWithOverflowCounts(int argc, char **argv, int flags); +int test_quicklistDeleteNegative100From500List(int argc, char **argv, int flags); +int test_quicklistDelete10Count5From50List(int argc, char **argv, int flags); +int test_quicklistNumbersOnlyListRead(int argc, char **argv, int flags); +int test_quicklistNumbersLargerListRead(int argc, char **argv, int flags); +int test_quicklistNumbersLargerListReadB(int argc, char **argv, int flags); +int test_quicklistLremTestAtCompress(int argc, char **argv, int flags); +int test_quicklistIterateReverseDeleteAtCompress(int argc, char **argv, int flags); +int test_quicklistIteratorAtIndexTestAtCompress(int argc, char **argv, int flags); +int test_quicklistLtrimTestAAtCompress(int argc, char **argv, int flags); +int test_quicklistLtrimTestBAtCompress(int argc, char **argv, int flags); +int test_quicklistLtrimTestCAtCompress(int argc, char **argv, int flags); +int test_quicklistLtrimTestDAtCompress(int argc, char **argv, int flags); +int test_quicklistVerifySpecificCompressionOfInteriorNodes(int argc, char **argv, int flags); +int test_quicklistBookmarkGetUpdatedToNextItem(int argc, char **argv, int flags); +int test_quicklistBookmarkLimit(int argc, char **argv, int flags); +int test_quicklistCompressAndDecompressQuicklistListpackNode(int argc, char **argv, int flags); +int test_quicklistCompressAndDecomressQuicklistPlainNodeLargeThanUINT32MAX(int argc, char **argv, int flags); int test_raxRandomWalk(int argc, char **argv, int flags); int test_raxIteratorUnitTests(int argc, char **argv, int flags); int test_raxTryInsertUnitTests(int argc, char **argv, int flags); @@ -157,6 +215,7 @@ unitTest __test_endianconv_c[] = {{"test_endianconv", test_endianconv}, {NULL, N unitTest __test_intset_c[] = {{"test_intsetValueEncodings", test_intsetValueEncodings}, {"test_intsetBasicAdding", test_intsetBasicAdding}, {"test_intsetLargeNumberRandomAdd", test_intsetLargeNumberRandomAdd}, {"test_intsetUpgradeFromint16Toint32", test_intsetUpgradeFromint16Toint32}, {"test_intsetUpgradeFromint16Toint64", test_intsetUpgradeFromint16Toint64}, {"test_intsetUpgradeFromint32Toint64", test_intsetUpgradeFromint32Toint64}, {"test_intsetStressLookups", test_intsetStressLookups}, {"test_intsetStressAddDelete", test_intsetStressAddDelete}, {NULL, NULL}}; unitTest __test_kvstore_c[] = {{"test_kvstoreAdd16Keys", test_kvstoreAdd16Keys}, {"test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreIteratorRemoveAllKeysDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysNoDeleteEmptyDict}, {"test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict", test_kvstoreDictIteratorRemoveAllKeysDeleteEmptyDict}, {NULL, NULL}}; unitTest __test_listpack_c[] = {{"test_listpackCreateIntList", test_listpackCreateIntList}, {"test_listpackCreateList", test_listpackCreateList}, {"test_listpackLpPrepend", test_listpackLpPrepend}, {"test_listpackLpPrependInteger", test_listpackLpPrependInteger}, {"test_listpackGetELementAtIndex", test_listpackGetELementAtIndex}, {"test_listpackPop", test_listpackPop}, {"test_listpackGetELementAtIndex2", test_listpackGetELementAtIndex2}, {"test_listpackIterate0toEnd", test_listpackIterate0toEnd}, {"test_listpackIterate1toEnd", test_listpackIterate1toEnd}, {"test_listpackIterate2toEnd", test_listpackIterate2toEnd}, {"test_listpackIterateBackToFront", test_listpackIterateBackToFront}, {"test_listpackIterateBackToFrontWithDelete", test_listpackIterateBackToFrontWithDelete}, {"test_listpackDeleteWhenNumIsMinusOne", test_listpackDeleteWhenNumIsMinusOne}, {"test_listpackDeleteWithNegativeIndex", test_listpackDeleteWithNegativeIndex}, {"test_listpackDeleteInclusiveRange0_0", test_listpackDeleteInclusiveRange0_0}, {"test_listpackDeleteInclusiveRange0_1", test_listpackDeleteInclusiveRange0_1}, {"test_listpackDeleteInclusiveRange1_2", test_listpackDeleteInclusiveRange1_2}, {"test_listpackDeleteWitStartIndexOutOfRange", test_listpackDeleteWitStartIndexOutOfRange}, {"test_listpackDeleteWitNumOverflow", test_listpackDeleteWitNumOverflow}, {"test_listpackBatchDelete", test_listpackBatchDelete}, {"test_listpackDeleteFooWhileIterating", test_listpackDeleteFooWhileIterating}, {"test_listpackReplaceWithSameSize", test_listpackReplaceWithSameSize}, {"test_listpackReplaceWithDifferentSize", test_listpackReplaceWithDifferentSize}, {"test_listpackRegressionGt255Bytes", test_listpackRegressionGt255Bytes}, {"test_listpackCreateLongListAndCheckIndices", test_listpackCreateLongListAndCheckIndices}, {"test_listpackCompareStrsWithLpEntries", test_listpackCompareStrsWithLpEntries}, {"test_listpackLpMergeEmptyLps", test_listpackLpMergeEmptyLps}, {"test_listpackLpMergeLp1Larger", test_listpackLpMergeLp1Larger}, {"test_listpackLpMergeLp2Larger", test_listpackLpMergeLp2Larger}, {"test_listpackLpNextRandom", test_listpackLpNextRandom}, {"test_listpackLpNextRandomCC", test_listpackLpNextRandomCC}, {"test_listpackRandomPairWithOneElement", test_listpackRandomPairWithOneElement}, {"test_listpackRandomPairWithManyElements", test_listpackRandomPairWithManyElements}, {"test_listpackRandomPairsWithOneElement", test_listpackRandomPairsWithOneElement}, {"test_listpackRandomPairsWithManyElements", test_listpackRandomPairsWithManyElements}, {"test_listpackRandomPairsUniqueWithOneElement", test_listpackRandomPairsUniqueWithOneElement}, {"test_listpackRandomPairsUniqueWithManyElements", test_listpackRandomPairsUniqueWithManyElements}, {"test_listpackPushVariousEncodings", test_listpackPushVariousEncodings}, {"test_listpackLpFind", test_listpackLpFind}, {"test_listpackLpValidateIntegrity", test_listpackLpValidateIntegrity}, {"test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN", test_listpackNumberOfElementsExceedsLP_HDR_NUMELE_UNKNOWN}, {"test_listpackStressWithRandom", test_listpackStressWithRandom}, {"test_listpackSTressWithVariableSize", test_listpackSTressWithVariableSize}, {"test_listpackBenchmarkInit", test_listpackBenchmarkInit}, {"test_listpackBenchmarkLpAppend", test_listpackBenchmarkLpAppend}, {"test_listpackBenchmarkLpFindString", test_listpackBenchmarkLpFindString}, {"test_listpackBenchmarkLpFindNumber", test_listpackBenchmarkLpFindNumber}, {"test_listpackBenchmarkLpSeek", test_listpackBenchmarkLpSeek}, {"test_listpackBenchmarkLpValidateIntegrity", test_listpackBenchmarkLpValidateIntegrity}, {"test_listpackBenchmarkLpCompareWithString", test_listpackBenchmarkLpCompareWithString}, {"test_listpackBenchmarkLpCompareWithNumber", test_listpackBenchmarkLpCompareWithNumber}, {"test_listpackBenchmarkFree", test_listpackBenchmarkFree}, {NULL, NULL}}; +unitTest __test_quicklist_c[] = {{"test_quicklistCreateList", test_quicklistCreateList}, {"test_quicklistAddToTailOfEmptyList", test_quicklistAddToTailOfEmptyList}, {"test_quicklistAddToHeadOfEmptyList", test_quicklistAddToHeadOfEmptyList}, {"test_quicklistAddToTail5xAtCompress", test_quicklistAddToTail5xAtCompress}, {"test_quicklistAddToHead5xAtCompress", test_quicklistAddToHead5xAtCompress}, {"test_quicklistAddToTail500xAtCompress", test_quicklistAddToTail500xAtCompress}, {"test_quicklistAddToHead500xAtCompress", test_quicklistAddToHead500xAtCompress}, {"test_quicklistRotateEmpty", test_quicklistRotateEmpty}, {"test_quicklistComprassionPlainNode", test_quicklistComprassionPlainNode}, {"test_quicklistNextPlainNode", test_quicklistNextPlainNode}, {"test_quicklistRotatePlainNode", test_quicklistRotatePlainNode}, {"test_quicklistRotateOneValOnce", test_quicklistRotateOneValOnce}, {"test_quicklistRotate500Val5000TimesAtCompress", test_quicklistRotate500Val5000TimesAtCompress}, {"test_quicklistPopEmpty", test_quicklistPopEmpty}, {"test_quicklistPop1StringFrom1", test_quicklistPop1StringFrom1}, {"test_quicklistPopHead1NumberFrom1", test_quicklistPopHead1NumberFrom1}, {"test_quicklistPopHead500From500", test_quicklistPopHead500From500}, {"test_quicklistPopHead5000From500", test_quicklistPopHead5000From500}, {"test_quicklistIterateForwardOver500List", test_quicklistIterateForwardOver500List}, {"test_quicklistIterateReverseOver500List", test_quicklistIterateReverseOver500List}, {"test_quicklistInsertAfter1Element", test_quicklistInsertAfter1Element}, {"test_quicklistInsertBefore1Element", test_quicklistInsertBefore1Element}, {"test_quicklistInsertHeadWhileHeadNodeIsFull", test_quicklistInsertHeadWhileHeadNodeIsFull}, {"test_quicklistInsertTailWhileTailNodeIsFull", test_quicklistInsertTailWhileTailNodeIsFull}, {"test_quicklistInsertOnceInElementsWhileIteratingAtCompress", test_quicklistInsertOnceInElementsWhileIteratingAtCompress}, {"test_quicklistInsertBefore250NewInMiddleOf500ElementsAtCompress", test_quicklistInsertBefore250NewInMiddleOf500ElementsAtCompress}, {"test_quicklistInsertAfter250NewInMiddleOf500ElementsAtCompress", test_quicklistInsertAfter250NewInMiddleOf500ElementsAtCompress}, {"test_quicklistDuplicateEmptyList", test_quicklistDuplicateEmptyList}, {"test_quicklistDuplicateListOf1Element", test_quicklistDuplicateListOf1Element}, {"test_quicklistDuplicateListOf500", test_quicklistDuplicateListOf500}, {"test_quicklistIndex1200From500ListAtFill", test_quicklistIndex1200From500ListAtFill}, {"test_quicklistIndex12From500ListAtFill", test_quicklistIndex12From500ListAtFill}, {"test_quicklistIndex100From500ListAtFill", test_quicklistIndex100From500ListAtFill}, {"test_quicklistIndexTooBig1From50ListAtFill", test_quicklistIndexTooBig1From50ListAtFill}, {"test_quicklistDeleteRangeEmptyList", test_quicklistDeleteRangeEmptyList}, {"test_quicklistDeleteRangeOfEntireNodeInListOfOneNode", test_quicklistDeleteRangeOfEntireNodeInListOfOneNode}, {"test_quicklistDeleteRangeOfEntireNodeWithOverflowCounts", test_quicklistDeleteRangeOfEntireNodeWithOverflowCounts}, {"test_quicklistDeleteMiddle100Of500List", test_quicklistDeleteMiddle100Of500List}, {"test_quicklistDeleteLessThanFillButAcrossNodes", test_quicklistDeleteLessThanFillButAcrossNodes}, {"test_quicklistDeleteNegative1From500List", test_quicklistDeleteNegative1From500List}, {"test_quicklistDeleteNegative1From500ListWithOverflowCounts", test_quicklistDeleteNegative1From500ListWithOverflowCounts}, {"test_quicklistDeleteNegative100From500List", test_quicklistDeleteNegative100From500List}, {"test_quicklistDelete10Count5From50List", test_quicklistDelete10Count5From50List}, {"test_quicklistNumbersOnlyListRead", test_quicklistNumbersOnlyListRead}, {"test_quicklistNumbersLargerListRead", test_quicklistNumbersLargerListRead}, {"test_quicklistNumbersLargerListReadB", test_quicklistNumbersLargerListReadB}, {"test_quicklistLremTestAtCompress", test_quicklistLremTestAtCompress}, {"test_quicklistIterateReverseDeleteAtCompress", test_quicklistIterateReverseDeleteAtCompress}, {"test_quicklistIteratorAtIndexTestAtCompress", test_quicklistIteratorAtIndexTestAtCompress}, {"test_quicklistLtrimTestAAtCompress", test_quicklistLtrimTestAAtCompress}, {"test_quicklistLtrimTestBAtCompress", test_quicklistLtrimTestBAtCompress}, {"test_quicklistLtrimTestCAtCompress", test_quicklistLtrimTestCAtCompress}, {"test_quicklistLtrimTestDAtCompress", test_quicklistLtrimTestDAtCompress}, {"test_quicklistVerifySpecificCompressionOfInteriorNodes", test_quicklistVerifySpecificCompressionOfInteriorNodes}, {"test_quicklistBookmarkGetUpdatedToNextItem", test_quicklistBookmarkGetUpdatedToNextItem}, {"test_quicklistBookmarkLimit", test_quicklistBookmarkLimit}, {"test_quicklistCompressAndDecompressQuicklistListpackNode", test_quicklistCompressAndDecompressQuicklistListpackNode}, {"test_quicklistCompressAndDecomressQuicklistPlainNodeLargeThanUINT32MAX", test_quicklistCompressAndDecomressQuicklistPlainNodeLargeThanUINT32MAX}, {NULL, NULL}}; unitTest __test_rax_c[] = {{"test_raxRandomWalk", test_raxRandomWalk}, {"test_raxIteratorUnitTests", test_raxIteratorUnitTests}, {"test_raxTryInsertUnitTests", test_raxTryInsertUnitTests}, {"test_raxRegressionTest1", test_raxRegressionTest1}, {"test_raxRegressionTest2", test_raxRegressionTest2}, {"test_raxRegressionTest3", test_raxRegressionTest3}, {"test_raxRegressionTest4", test_raxRegressionTest4}, {"test_raxRegressionTest5", test_raxRegressionTest5}, {"test_raxRegressionTest6", test_raxRegressionTest6}, {"test_raxBenchmark", test_raxBenchmark}, {"test_raxHugeKey", test_raxHugeKey}, {"test_raxFuzz", test_raxFuzz}, {NULL, NULL}}; unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {"test_sdssplitargs", test_sdssplitargs}, {NULL, NULL}}; unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; @@ -176,6 +235,7 @@ struct unitTestSuite { {"test_intset.c", __test_intset_c}, {"test_kvstore.c", __test_kvstore_c}, {"test_listpack.c", __test_listpack_c}, + {"test_quicklist.c", __test_quicklist_c}, {"test_rax.c", __test_rax_c}, {"test_sds.c", __test_sds_c}, {"test_sha1.c", __test_sha1_c}, diff --git a/src/unit/test_quicklist.c b/src/unit/test_quicklist.c new file mode 100644 index 0000000000..6addb33f41 --- /dev/null +++ b/src/unit/test_quicklist.c @@ -0,0 +1,2300 @@ +#include +#include +#include +#include "test_help.h" +#include +#include + +#include "../zmalloc.h" +#include "../listpack.h" +#include "../quicklist.c" + +static int options[] = {0, 1, 2, 3, 4, 5, 6, 10}; +static int option_count = 8; + +static int fills[] = {-5, -4, -3, -2, -1, 0, + 1, 2, 32, 66, 128, 999}; +static int fill_count = 12; +static long long runtime[8]; +static unsigned int err = 0; + +/*----------------------------------------------------------------------------- + * Unit Function + *----------------------------------------------------------------------------*/ +/* Return the UNIX time in microseconds */ +static long long ustime(void) { + struct timeval tv; + long long ust; + + gettimeofday(&tv, NULL); + ust = ((long long)tv.tv_sec) * 1000000; + ust += tv.tv_usec; + return ust; +} + +/* Return the UNIX time in milliseconds */ +static long long mstime(void) { + return ustime() / 1000; +} + +/* Generate new string concatenating integer i against string 'prefix' */ +static char *genstr(char *prefix, int i) { + static char result[64] = {0}; + snprintf(result, sizeof(result), "%s%d", prefix, i); + return result; +} + +__attribute__((unused)) static void randstring(unsigned char *target, size_t sz) { + size_t p = 0; + int minval, maxval; + switch (rand() % 3) { + case 0: + minval = 'a'; + maxval = 'z'; + break; + case 1: + minval = '0'; + maxval = '9'; + break; + case 2: + minval = 'A'; + maxval = 'Z'; + break; + default: + abort(); + } + + while (p < sz) + target[p++] = minval + rand() % (maxval - minval + 1); +} + +#define TEST(name) printf("test — %s\n", name); + +#define QL_TEST_VERBOSE 0 +static void ql_info(quicklist *ql) { +#if QL_TEST_VERBOSE + TEST_PRINT_INFO("Container length: %lu\n", ql->len); + TEST_PRINT_INFO("Container size: %lu\n", ql->count); + if (ql->head) + TEST_PRINT_INFO("\t(zsize head: %lu)\n", lpLength(ql->head->entry)); + if (ql->tail) + TEST_PRINT_INFO("\t(zsize tail: %lu)\n", lpLength(ql->tail->entry)); +#else + UNUSED(ql); +#endif +} + +/* Iterate over an entire quicklist. + * Print the list if 'print' == 1. + * + * Returns physical count of elements found by iterating over the list. */ +static int _itrprintr(quicklist *ql, int print, int forward) { + quicklistIter *iter = + quicklistGetIterator(ql, forward ? AL_START_HEAD : AL_START_TAIL); + quicklistEntry entry; + int i = 0; + int p = 0; + quicklistNode *prev = NULL; + while (quicklistNext(iter, &entry)) { + if (entry.node != prev) { + /* Count the number of list nodes too */ + p++; + prev = entry.node; + } + if (print) { + int size = (entry.sz > (1 << 20)) ? 1 << 20 : entry.sz; + TEST_PRINT_INFO("[%3d (%2d)]: [%.*s] (%lld)\n", i, p, size, + (char *)entry.value, entry.longval); + } + i++; + } + quicklistReleaseIterator(iter); + return i; +} + +static int itrprintr(quicklist *ql, int print) { + return _itrprintr(ql, print, 1); +} + +static int itrprintr_rev(quicklist *ql, int print) { + return _itrprintr(ql, print, 0); +} + +#define ql_verify(a, b, c, d, e) \ + do { \ + err += _ql_verify((a), (b), (c), (d), (e)); \ + } while (0) + +static int _ql_verify_compress(quicklist *ql) { + int errors = 0; + if (quicklistAllowsCompression(ql)) { + quicklistNode *node = ql->head; + unsigned int low_raw = ql->compress; + unsigned int high_raw = ql->len - ql->compress; + + for (unsigned int at = 0; at < ql->len; at++, node = node->next) { + if (node && (at < low_raw || at >= high_raw)) { + if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) { + TEST_PRINT_INFO("Incorrect compression: node %d is " + "compressed at depth %d ((%u, %u); total " + "nodes: %lu; size: %zu; recompress: %d)", + at, ql->compress, low_raw, high_raw, ql->len, node->sz, + node->recompress); + errors++; + } + } else { + if (node->encoding != QUICKLIST_NODE_ENCODING_LZF && + !node->attempted_compress) { + TEST_PRINT_INFO("Incorrect non-compression: node %d is NOT " + "compressed at depth %d ((%u, %u); total " + "nodes: %lu; size: %zu; recompress: %d; attempted: %d)", + at, ql->compress, low_raw, high_raw, ql->len, node->sz, + node->recompress, node->attempted_compress); + errors++; + } + } + } + } + return errors; +} + +/* Verify list metadata matches physical list contents. */ +static int _ql_verify(quicklist *ql, uint32_t len, uint32_t count, uint32_t head_count, uint32_t tail_count) { + int errors = 0; + + ql_info(ql); + if (len != ql->len) { + TEST_PRINT_INFO("quicklist length wrong: expected %d, got %lu", len, ql->len); + errors++; + } + + if (count != ql->count) { + TEST_PRINT_INFO("quicklist count wrong: expected %d, got %lu", count, ql->count); + errors++; + } + + int loopr = itrprintr(ql, 0); + if (loopr != (int)ql->count) { + TEST_PRINT_INFO("quicklist cached count not match actual count: expected %lu, got " + "%d", + ql->count, loopr); + errors++; + } + + int rloopr = itrprintr_rev(ql, 0); + if (loopr != rloopr) { + TEST_PRINT_INFO("quicklist has different forward count than reverse count! " + "Forward count is %d, reverse count is %d.", + loopr, rloopr); + errors++; + } + + if (ql->len == 0 && !errors) { + return errors; + } + + if (ql->head && head_count != ql->head->count && + head_count != lpLength(ql->head->entry)) { + TEST_PRINT_INFO("quicklist head count wrong: expected %d, " + "got cached %d vs. actual %lu", + head_count, ql->head->count, lpLength(ql->head->entry)); + errors++; + } + + if (ql->tail && tail_count != ql->tail->count && + tail_count != lpLength(ql->tail->entry)) { + TEST_PRINT_INFO("quicklist tail count wrong: expected %d, " + "got cached %u vs. actual %lu", + tail_count, ql->tail->count, lpLength(ql->tail->entry)); + errors++; + } + + errors += _ql_verify_compress(ql); + return errors; +} + +/* Release iterator and verify compress correctly. */ +static void ql_release_iterator(quicklistIter *iter) { + quicklist *ql = NULL; + if (iter) ql = iter->quicklist; + quicklistReleaseIterator(iter); + if (ql && _ql_verify_compress(ql)) { + abort(); + } +} + +/*----------------------------------------------------------------------------- + * Quicklist Unit Test + *----------------------------------------------------------------------------*/ +int test_quicklistCreateList(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + TEST("create list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistAddToTailOfEmptyList(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("add to tail of empty list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushTail(ql, "hello", 6); + /* 1 for head and 1 for tail because 1 node = head = tail */ + ql_verify(ql, 1, 1, 1, 1); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistAddToHeadOfEmptyList(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("add to head of empty list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushHead(ql, "hello", 6); + /* 1 for head and 1 for tail because 1 node = head = tail */ + ql_verify(ql, 1, 1, 1, 1); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistAddToTail5xAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("add to tail 5x at compress"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 5; i++) quicklistPushTail(ql, genstr("hello", i), 32); + if (ql->count != 5) { + err++; + }; + if (fills[f] == 32) ql_verify(ql, 1, 5, 5, 5); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistAddToHead5xAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("add to head 5x at compress"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 5; i++) quicklistPushHead(ql, genstr("hello", i), 32); + if (ql->count != 5) { + err++; + }; + if (fills[f] == 32) ql_verify(ql, 1, 5, 5, 5); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistAddToTail500xAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("add to tail 500x at compress"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i), 64); + if (ql->count != 500) { + err++; + }; + if (fills[f] == 32) ql_verify(ql, 16, 500, 32, 20); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistAddToHead500xAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("add to head 500x at compress"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); + if (ql->count != 500) { + err++; + }; + if (fills[f] == 32) ql_verify(ql, 16, 500, 20, 32); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistRotateEmpty(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("rotate empty"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistRotate(ql); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistComprassionPlainNode(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("Comprassion Plain node"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; + + char buf[large_limit]; + quicklist *ql = quicklistNew(fills[f], 1); + for (int i = 0; i < 500; i++) { + /* Set to 256 to allow the node to be triggered to compress, + * if it is less than 48(nocompress), the test will be successful. */ + snprintf(buf, sizeof(buf), "hello%d", i); + quicklistPushHead(ql, buf, large_limit); + } + + quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); + quicklistEntry entry; + int i = 0; + while (quicklistNext(iter, &entry)) { + TEST_ASSERT(QL_NODE_IS_PLAIN(entry.node)); + snprintf(buf, sizeof(buf), "hello%d", i); + if (strcmp((char *)entry.value, buf)) { + TEST_PRINT_INFO("value [%s] didn't match [%s] at position %d", entry.value, buf, i); + err++; + } + i++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistNextPlainNode(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("NEXT plain node"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; + quicklist *ql = quicklistNew(fills[f], options[_i]); + + char buf[large_limit]; + memcpy(buf, "plain", 5); + quicklistPushHead(ql, buf, large_limit); + quicklistPushHead(ql, buf, large_limit); + quicklistPushHead(ql, "packed3", 7); + quicklistPushHead(ql, "packed4", 7); + quicklistPushHead(ql, buf, large_limit); + + quicklistEntry entry; + quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); + + while (quicklistNext(iter, &entry) != 0) { + if (QL_NODE_IS_PLAIN(entry.node)) + TEST_ASSERT(!memcmp(entry.value, "plain", 5)); + else + TEST_ASSERT(!memcmp(entry.value, "packed", 6)); + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistRotatePlainNode(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("rotate plain node"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; + + unsigned char *data = NULL; + size_t sz; + long long lv; + int i = 0; + quicklist *ql = quicklistNew(fills[f], options[_i]); + char buf[large_limit]; + memcpy(buf, "hello1", 6); + quicklistPushHead(ql, buf, large_limit); + memcpy(buf, "hello4", 6); + quicklistPushHead(ql, buf, large_limit); + memcpy(buf, "hello3", 6); + quicklistPushHead(ql, buf, large_limit); + memcpy(buf, "hello2", 6); + quicklistPushHead(ql, buf, large_limit); + quicklistRotate(ql); + + for (i = 1; i < 5; i++) { + TEST_ASSERT(QL_NODE_IS_PLAIN(ql->tail)); + quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv); + int temp_char = data[5]; + zfree(data); + TEST_ASSERT(temp_char == ('0' + i)); + } + + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistRotateOneValOnce(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("rotate one val once"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + quicklistPushHead(ql, "hello", 6); + quicklistRotate(ql); + /* Ignore compression verify because listpack is + * too small to compress. */ + ql_verify(ql, 1, 1, 1, 1); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistRotate500Val5000TimesAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("rotate 500 val 5000 times at compress"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + quicklistPushHead(ql, "900", 3); + quicklistPushHead(ql, "7000", 4); + quicklistPushHead(ql, "-1200", 5); + quicklistPushHead(ql, "42", 2); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 64); + ql_info(ql); + for (int i = 0; i < 5000; i++) { + ql_info(ql); + quicklistRotate(ql); + } + if (fills[f] == 1) + ql_verify(ql, 504, 504, 1, 1); + else if (fills[f] == 2) + ql_verify(ql, 252, 504, 2, 2); + else if (fills[f] == 32) + ql_verify(ql, 16, 504, 32, 24); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistPopEmpty(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("pop empty"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPop(ql, QUICKLIST_HEAD, NULL, NULL, NULL); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistPop1StringFrom1(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("pop 1 string from 1"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + char *populate = genstr("hello", 331); + quicklistPushHead(ql, populate, 32); + unsigned char *data; + size_t sz; + long long lv; + ql_info(ql); + TEST_ASSERT(quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv)); + TEST_ASSERT(data != NULL); + TEST_ASSERT(sz == 32); + if (strcmp(populate, (char *)data)) { + int size = sz; + TEST_PRINT_INFO("Pop'd value (%.*s) didn't equal original value (%s)", size, data, populate); + err++; + } + zfree(data); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistPopHead1NumberFrom1(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("pop head 1 number from 1"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushHead(ql, "55513", 5); + unsigned char *data; + size_t sz; + long long lv; + ql_info(ql); + TEST_ASSERT(quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv)); + TEST_ASSERT(data == NULL); + TEST_ASSERT(lv == 55513); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistPopHead500From500(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("pop head 500 from 500"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); + ql_info(ql); + for (int i = 0; i < 500; i++) { + unsigned char *data; + size_t sz; + long long lv; + int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv); + TEST_ASSERT(ret == 1); + TEST_ASSERT(data != NULL); + TEST_ASSERT(sz == 32); + if (strcmp(genstr("hello", 499 - i), (char *)data)) { + int size = sz; + TEST_PRINT_INFO("Pop'd value (%.*s) didn't equal original value (%s)", size, data, genstr("hello", 499 - i)); + err++; + } + zfree(data); + } + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistPopHead5000From500(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("pop head 5000 from 500"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); + for (int i = 0; i < 5000; i++) { + unsigned char *data; + size_t sz; + long long lv; + int ret = quicklistPop(ql, QUICKLIST_HEAD, &data, &sz, &lv); + if (i < 500) { + TEST_ASSERT(ret == 1); + TEST_ASSERT(data != NULL); + TEST_ASSERT(sz == 32); + if (strcmp(genstr("hello", 499 - i), (char *)data)) { + int size = sz; + TEST_PRINT_INFO("Pop'd value (%.*s) didn't equal original value " + "(%s)", + size, data, genstr("hello", 499 - i)); + err++; + } + zfree(data); + } else { + TEST_ASSERT(ret == 0); + } + } + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIterateForwardOver500List(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("iterate forward over 500 list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); + quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD); + quicklistEntry entry; + int i = 499, count = 0; + while (quicklistNext(iter, &entry)) { + char *h = genstr("hello", i); + if (strcmp((char *)entry.value, h)) { + TEST_PRINT_INFO("value [%s] didn't match [%s] at position %d", entry.value, h, i); + err++; + } + i--; + count++; + } + if (count != 500) { + TEST_PRINT_INFO("Didn't iterate over exactly 500 elements (%d)", i); + err++; + } + ql_verify(ql, 16, 500, 20, 32); + ql_release_iterator(iter); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIterateReverseOver500List(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("iterate reverse over 500 list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); + quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); + quicklistEntry entry; + int i = 0; + while (quicklistNext(iter, &entry)) { + char *h = genstr("hello", i); + if (strcmp((char *)entry.value, h)) { + TEST_PRINT_INFO("value [%s] didn't match [%s] at position %d", entry.value, h, i); + err++; + } + i++; + } + if (i != 500) { + TEST_PRINT_INFO("Didn't iterate over exactly 500 elements (%d)", i); + err++; + } + ql_verify(ql, 16, 500, 20, 32); + ql_release_iterator(iter); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistInsertAfter1Element(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("insert after 1 element"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushHead(ql, "hello", 6); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + quicklistInsertAfter(iter, &entry, "abc", 4); + ql_release_iterator(iter); + ql_verify(ql, 1, 2, 2, 2); + + /* verify results */ + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + int sz = entry.sz; + if (strncmp((char *)entry.value, "hello", 5)) { + TEST_PRINT_INFO("Value 0 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); + sz = entry.sz; + if (strncmp((char *)entry.value, "abc", 3)) { + TEST_PRINT_INFO("Value 1 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistInsertBefore1Element(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("insert before 1 element"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushHead(ql, "hello", 6); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + quicklistInsertBefore(iter, &entry, "abc", 4); + ql_release_iterator(iter); + ql_verify(ql, 1, 2, 2, 2); + + /* verify results */ + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + int sz = entry.sz; + if (strncmp((char *)entry.value, "abc", 3)) { + TEST_PRINT_INFO("Value 0 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); + sz = entry.sz; + if (strncmp((char *)entry.value, "hello", 5)) { + TEST_PRINT_INFO("Value 1 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistInsertHeadWhileHeadNodeIsFull(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("insert head while head node is full"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(4, options[_i]); + for (int i = 0; i < 10; i++) quicklistPushTail(ql, genstr("hello", i), 6); + quicklistSetFill(ql, -1); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, -10, &entry); + char buf[4096] = {0}; + quicklistInsertBefore(iter, &entry, buf, 4096); + ql_release_iterator(iter); + ql_verify(ql, 4, 11, 1, 2); + quicklistRelease(ql); + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistInsertTailWhileTailNodeIsFull(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("insert tail while tail node is full"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(4, options[_i]); + for (int i = 0; i < 10; i++) quicklistPushHead(ql, genstr("hello", i), 6); + quicklistSetFill(ql, -1); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); + char buf[4096] = {0}; + quicklistInsertAfter(iter, &entry, buf, 4096); + ql_release_iterator(iter); + ql_verify(ql, 4, 11, 2, 1); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistInsertOnceInElementsWhileIteratingAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("insert once in elements while iterating at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + quicklistPushTail(ql, "abc", 3); + quicklistSetFill(ql, 1); + quicklistPushTail(ql, "def", 3); /* force to unique node */ + quicklistSetFill(ql, f); + quicklistPushTail(ql, "bob", 3); /* force to reset for +3 */ + quicklistPushTail(ql, "foo", 3); + quicklistPushTail(ql, "zoo", 3); + + itrprintr(ql, 0); + /* insert "bar" before "bob" while iterating over list. */ + quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD); + quicklistEntry entry; + while (quicklistNext(iter, &entry)) { + if (!strncmp((char *)entry.value, "bob", 3)) { + /* Insert as fill = 1 so it spills into new node. */ + quicklistInsertBefore(iter, &entry, "bar", 3); + break; /* didn't we fix insert-while-iterating? */ + } + } + ql_release_iterator(iter); + itrprintr(ql, 0); + + /* verify results */ + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + int sz = entry.sz; + + if (strncmp((char *)entry.value, "abc", 3)) { + TEST_PRINT_INFO("Value 0 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); + if (strncmp((char *)entry.value, "def", 3)) { + TEST_PRINT_INFO("Value 1 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 2, &entry); + if (strncmp((char *)entry.value, "bar", 3)) { + TEST_PRINT_INFO("Value 2 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 3, &entry); + if (strncmp((char *)entry.value, "bob", 3)) { + TEST_PRINT_INFO("Value 3 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 4, &entry); + if (strncmp((char *)entry.value, "foo", 3)) { + TEST_PRINT_INFO("Value 4 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 5, &entry); + if (strncmp((char *)entry.value, "zoo", 3)) { + TEST_PRINT_INFO("Value 5 didn't match, instead got: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + long long stop = mstime(); + runtime[_i] += stop - start; + } + + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistInsertBefore250NewInMiddleOf500ElementsAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("insert [before] 250 new in middle of 500 elements at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i), 32); + for (int i = 0; i < 250; i++) { + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, 250, &entry); + quicklistInsertBefore(iter, &entry, genstr("abc", i), 32); + ql_release_iterator(iter); + } + if (fills[f] == 32) ql_verify(ql, 25, 750, 32, 20); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistInsertAfter250NewInMiddleOf500ElementsAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("insert [after] 250 new in middle of 500 elements at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); + for (int i = 0; i < 250; i++) { + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, 250, &entry); + quicklistInsertAfter(iter, &entry, genstr("abc", i), 32); + ql_release_iterator(iter); + } + + if (ql->count != 750) { + TEST_PRINT_INFO("List size not 750, but rather %ld", ql->count); + err++; + } + + if (fills[f] == 32) ql_verify(ql, 26, 750, 20, 32); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDuplicateEmptyList(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("duplicate empty list"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + ql_verify(ql, 0, 0, 0, 0); + quicklist *copy = quicklistDup(ql); + ql_verify(copy, 0, 0, 0, 0); + quicklistRelease(ql); + quicklistRelease(copy); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDuplicateListOf1Element(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("duplicate list of 1 element"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushHead(ql, genstr("hello", 3), 32); + ql_verify(ql, 1, 1, 1, 1); + quicklist *copy = quicklistDup(ql); + ql_verify(copy, 1, 1, 1, 1); + quicklistRelease(ql); + quicklistRelease(copy); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDuplicateListOf500(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("duplicate list of 500"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushHead(ql, genstr("hello", i), 32); + ql_verify(ql, 16, 500, 20, 32); + + quicklist *copy = quicklistDup(ql); + ql_verify(copy, 16, 500, 20, 32); + quicklistRelease(ql); + quicklistRelease(copy); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIndex1200From500ListAtFill(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("index 1,200 from 500 list at fill at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); + if (strcmp((char *)entry.value, "hello2") != 0) { + TEST_PRINT_INFO("Value: %s", entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 200, &entry); + if (strcmp((char *)entry.value, "hello201") != 0) { + TEST_PRINT_INFO("Value: %s", entry.value); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIndex12From500ListAtFill(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("index -1,-2 from 500 list at fill at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); + if (strcmp((char *)entry.value, "hello500") != 0) { + TEST_PRINT_INFO("Value: %s", entry.value); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -2, &entry); + if (strcmp((char *)entry.value, "hello499") != 0) { + TEST_PRINT_INFO("Value: %s", entry.value); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIndex100From500ListAtFill(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("index -100 from 500 list at fill at compress"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, -100, &entry); + if (strcmp((char *)entry.value, "hello401") != 0) { + TEST_PRINT_INFO("Value: %s", entry.value); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIndexTooBig1From50ListAtFill(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("index too big +1 from 50 list at fill at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + for (int i = 0; i < 50; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + quicklistEntry entry; + int sz = entry.sz; + iter = quicklistGetIteratorEntryAtIdx(ql, 50, &entry); + if (iter) { + TEST_PRINT_INFO("Index found at 50 with 50 list: %.*s", sz, entry.value); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteRangeEmptyList(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete range empty list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistDelRange(ql, 5, 20); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteRangeOfEntireNodeInListOfOneNode(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete range of entire node in list of one node"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + for (int i = 0; i < 32; i++) quicklistPushHead(ql, genstr("hello", i), 32); + ql_verify(ql, 1, 32, 32, 32); + quicklistDelRange(ql, 0, 32); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteRangeOfEntireNodeWithOverflowCounts(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete range of entire node with overflow counts"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + for (int i = 0; i < 32; i++) quicklistPushHead(ql, genstr("hello", i), 32); + ql_verify(ql, 1, 32, 32, 32); + quicklistDelRange(ql, 0, 128); + ql_verify(ql, 0, 0, 0, 0); + quicklistRelease(ql); + + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteMiddle100Of500List(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete middle 100 of 500 list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + ql_verify(ql, 16, 500, 32, 20); + quicklistDelRange(ql, 200, 100); + ql_verify(ql, 14, 400, 32, 20); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteLessThanFillButAcrossNodes(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete less than fill but across nodes"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + ql_verify(ql, 16, 500, 32, 20); + quicklistDelRange(ql, 60, 10); + ql_verify(ql, 16, 490, 32, 20); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteNegative1From500List(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete negative 1 from 500 list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + ql_verify(ql, 16, 500, 32, 20); + quicklistDelRange(ql, -1, 1); + ql_verify(ql, 16, 499, 32, 19); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteNegative1From500ListWithOverflowCounts(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete negative 1 from 500 list with overflow counts"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + ql_verify(ql, 16, 500, 32, 20); + quicklistDelRange(ql, -1, 128); + ql_verify(ql, 16, 499, 32, 19); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDeleteNegative100From500List(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete negative 100 from 500 list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 500; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + quicklistDelRange(ql, -100, 100); + ql_verify(ql, 13, 400, 32, 16); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistDelete10Count5From50List(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("delete -10 count 5 from 50 list"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + for (int i = 0; i < 50; i++) quicklistPushTail(ql, genstr("hello", i + 1), 32); + ql_verify(ql, 2, 50, 32, 18); + quicklistDelRange(ql, -10, 5); + ql_verify(ql, 2, 45, 32, 13); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistNumbersOnlyListRead(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("numbers only list read"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushTail(ql, "1111", 4); + quicklistPushTail(ql, "2222", 4); + quicklistPushTail(ql, "3333", 4); + quicklistPushTail(ql, "4444", 4); + ql_verify(ql, 1, 4, 4, 4); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + if (entry.longval != 1111) { + TEST_PRINT_INFO("Not 1111, %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 1, &entry); + if (entry.longval != 2222) { + TEST_PRINT_INFO("Not 2222, %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 2, &entry); + if (entry.longval != 3333) { + TEST_PRINT_INFO("Not 3333, %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 3, &entry); + if (entry.longval != 4444) { + TEST_PRINT_INFO("Not 4444, %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, 4, &entry); + if (iter) { + TEST_PRINT_INFO("Index past elements: %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); + if (entry.longval != 4444) { + TEST_PRINT_INFO("Not 4444 (reverse), %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -2, &entry); + if (entry.longval != 3333) { + TEST_PRINT_INFO("Not 3333 (reverse), %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -3, &entry); + if (entry.longval != 2222) { + TEST_PRINT_INFO("Not 2222 (reverse), %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -4, &entry); + if (entry.longval != 1111) { + TEST_PRINT_INFO("Not 1111 (reverse), %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -5, &entry); + if (iter) { + TEST_PRINT_INFO("Index past elements (reverse), %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistNumbersLargerListRead(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("numbers larger list read"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistSetFill(ql, 32); + char num[32]; + long long nums[5000]; + for (int i = 0; i < 5000; i++) { + nums[i] = -5157318210846258176 + i; + int sz = ll2string(num, sizeof(num), nums[i]); + quicklistPushTail(ql, num, sz); + } + quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20); + quicklistEntry entry; + for (int i = 0; i < 5000; i++) { + iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry); + if (entry.longval != nums[i]) { + TEST_PRINT_INFO("[%d] Not longval %lld but rather %lld", i, nums[i], entry.longval); + err++; + } + entry.longval = 0xdeadbeef; + ql_release_iterator(iter); + } + iter = quicklistGetIteratorEntryAtIdx(ql, 5000, &entry); + if (strncmp((char *)entry.value, "xxxxxxxxxxxxxxxxxxxx", 20)) { + TEST_PRINT_INFO("String val not match: %s", entry.value); + err++; + } + ql_verify(ql, 157, 5001, 32, 9); + ql_release_iterator(iter); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistNumbersLargerListReadB(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("numbers larger list read B"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + quicklist *ql = quicklistNew(-2, options[_i]); + quicklistPushTail(ql, "99", 2); + quicklistPushTail(ql, "98", 2); + quicklistPushTail(ql, "xxxxxxxxxxxxxxxxxxxx", 20); + quicklistPushTail(ql, "96", 2); + quicklistPushTail(ql, "95", 2); + quicklistReplaceAtIndex(ql, 1, "foo", 3); + quicklistReplaceAtIndex(ql, -1, "bar", 3); + quicklistRelease(ql); + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistLremTestAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("lrem test at compress"); + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + char *words[] = {"abc", "foo", "bar", "foobar", "foobared", "zap", "bar", "test", "foo"}; + char *result[] = {"abc", "foo", "foobar", "foobared", "zap", "test", "foo"}; + char *resultB[] = {"abc", "foo", "foobar", "foobared", "zap", "test"}; + for (int i = 0; i < 9; i++) quicklistPushTail(ql, words[i], strlen(words[i])); + + /* lrem 0 bar */ + quicklistIter *iter = quicklistGetIterator(ql, AL_START_HEAD); + quicklistEntry entry; + int i = 0; + while (quicklistNext(iter, &entry)) { + if (quicklistCompare(&entry, (unsigned char *)"bar", 3)) { + quicklistDelEntry(iter, &entry); + } + i++; + } + ql_release_iterator(iter); + + /* check result of lrem 0 bar */ + iter = quicklistGetIterator(ql, AL_START_HEAD); + i = 0; + while (quicklistNext(iter, &entry)) { + /* Result must be: abc, foo, foobar, foobared, zap, test, + * foo */ + int sz = entry.sz; + if (strncmp((char *)entry.value, result[i], entry.sz)) { + TEST_PRINT_INFO("No match at position %d, got %.*s instead of %s", i, sz, entry.value, result[i]); + err++; + } + i++; + } + ql_release_iterator(iter); + + quicklistPushTail(ql, "foo", 3); + + /* lrem -2 foo */ + iter = quicklistGetIterator(ql, AL_START_TAIL); + i = 0; + int del = 2; + while (quicklistNext(iter, &entry)) { + if (quicklistCompare(&entry, (unsigned char *)"foo", 3)) { + quicklistDelEntry(iter, &entry); + del--; + } + if (!del) break; + i++; + } + ql_release_iterator(iter); + + /* check result of lrem -2 foo */ + /* (we're ignoring the '2' part and still deleting all foo + * because + * we only have two foo) */ + iter = quicklistGetIterator(ql, AL_START_TAIL); + i = 0; + size_t resB = sizeof(resultB) / sizeof(*resultB); + while (quicklistNext(iter, &entry)) { + /* Result must be: abc, foo, foobar, foobared, zap, test, + * foo */ + int sz = entry.sz; + if (strncmp((char *)entry.value, resultB[resB - 1 - i], sz)) { + TEST_PRINT_INFO("No match at position %d, got %.*s instead of %s", i, sz, entry.value, + resultB[resB - 1 - i]); + err++; + } + i++; + } + + ql_release_iterator(iter); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIterateReverseDeleteAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("iterate reverse + delete at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + quicklistPushTail(ql, "abc", 3); + quicklistPushTail(ql, "def", 3); + quicklistPushTail(ql, "hij", 3); + quicklistPushTail(ql, "jkl", 3); + quicklistPushTail(ql, "oop", 3); + + quicklistEntry entry; + quicklistIter *iter = quicklistGetIterator(ql, AL_START_TAIL); + int i = 0; + while (quicklistNext(iter, &entry)) { + if (quicklistCompare(&entry, (unsigned char *)"hij", 3)) { + quicklistDelEntry(iter, &entry); + } + i++; + } + ql_release_iterator(iter); + + if (i != 5) { + TEST_PRINT_INFO("Didn't iterate 5 times, iterated %d times.", i); + err++; + } + + /* Check results after deletion of "hij" */ + iter = quicklistGetIterator(ql, AL_START_HEAD); + i = 0; + char *vals[] = {"abc", "def", "jkl", "oop"}; + while (quicklistNext(iter, &entry)) { + if (!quicklistCompare(&entry, (unsigned char *)vals[i], 3)) { + TEST_PRINT_INFO("Value at %d didn't match %s\n", i, vals[i]); + err++; + } + i++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistIteratorAtIndexTestAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("iterator at index test at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + char num[32]; + long long nums[5000]; + for (int i = 0; i < 760; i++) { + nums[i] = -5157318210846258176 + i; + int sz = ll2string(num, sizeof(num), nums[i]); + quicklistPushTail(ql, num, sz); + } + + quicklistEntry entry; + quicklistIter *iter = quicklistGetIteratorAtIdx(ql, AL_START_HEAD, 437); + int i = 437; + while (quicklistNext(iter, &entry)) { + if (entry.longval != nums[i]) { + TEST_PRINT_INFO("Expected %lld, but got %lld", entry.longval, nums[i]); + err++; + } + i++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistLtrimTestAAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("ltrim test A at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + char num[32]; + long long nums[5000]; + for (int i = 0; i < 32; i++) { + nums[i] = -5157318210846258176 + i; + int sz = ll2string(num, sizeof(num), nums[i]); + quicklistPushTail(ql, num, sz); + } + if (fills[f] == 32) ql_verify(ql, 1, 32, 32, 32); + /* ltrim 25 53 (keep [25,32] inclusive = 7 remaining) */ + quicklistDelRange(ql, 0, 25); + quicklistDelRange(ql, 0, 0); + quicklistEntry entry; + for (int i = 0; i < 7; i++) { + iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry); + if (entry.longval != nums[25 + i]) { + TEST_PRINT_INFO("Deleted invalid range! Expected %lld but got " + "%lld", + entry.longval, nums[25 + i]); + err++; + } + ql_release_iterator(iter); + } + if (fills[f] == 32) ql_verify(ql, 1, 7, 7, 7); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistLtrimTestBAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("ltrim test B at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + /* Force-disable compression because our 33 sequential + * integers don't compress and the check always fails. */ + quicklist *ql = quicklistNew(fills[f], QUICKLIST_NOCOMPRESS); + char num[32]; + long long nums[5000]; + for (int i = 0; i < 33; i++) { + nums[i] = i; + int sz = ll2string(num, sizeof(num), nums[i]); + quicklistPushTail(ql, num, sz); + } + if (fills[f] == 32) ql_verify(ql, 2, 33, 32, 1); + /* ltrim 5 16 (keep [5,16] inclusive = 12 remaining) */ + quicklistDelRange(ql, 0, 5); + quicklistDelRange(ql, -16, 16); + if (fills[f] == 32) ql_verify(ql, 1, 12, 12, 12); + quicklistEntry entry; + + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + if (entry.longval != 5) { + TEST_PRINT_INFO("A: longval not 5, but %lld", entry.longval); + err++; + } + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); + if (entry.longval != 16) { + TEST_PRINT_INFO("B! got instead: %lld", entry.longval); + err++; + } + quicklistPushTail(ql, "bobobob", 7); + ql_release_iterator(iter); + + iter = quicklistGetIteratorEntryAtIdx(ql, -1, &entry); + int sz = entry.sz; + if (strncmp((char *)entry.value, "bobobob", 7)) { + TEST_PRINT_INFO("Tail doesn't match bobobob, it's %.*s instead", sz, entry.value); + err++; + } + ql_release_iterator(iter); + + for (int i = 0; i < 12; i++) { + iter = quicklistGetIteratorEntryAtIdx(ql, i, &entry); + if (entry.longval != nums[5 + i]) { + TEST_PRINT_INFO("Deleted invalid range! Expected %lld but got " + "%lld", + entry.longval, nums[5 + i]); + err++; + } + + ql_release_iterator(iter); + } + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistLtrimTestCAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("ltrim test C at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + char num[32]; + long long nums[5000]; + for (int i = 0; i < 33; i++) { + nums[i] = -5157318210846258176 + i; + int sz = ll2string(num, sizeof(num), nums[i]); + quicklistPushTail(ql, num, sz); + } + if (fills[f] == 32) ql_verify(ql, 2, 33, 32, 1); + /* ltrim 3 3 (keep [3,3] inclusive = 1 remaining) */ + quicklistDelRange(ql, 0, 3); + quicklistDelRange(ql, -29, 4000); /* make sure not loop forever */ + if (fills[f] == 32) ql_verify(ql, 1, 1, 1, 1); + quicklistEntry entry; + iter = quicklistGetIteratorEntryAtIdx(ql, 0, &entry); + if (entry.longval != -5157318210846258173) { + err++; + } + ql_release_iterator(iter); + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistLtrimTestDAtCompress(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + quicklistIter *iter; + TEST("ltrim test D at compress"); + + for (int _i = 0; _i < option_count; _i++) { + long long start = mstime(); + for (int f = 0; f < fill_count; f++) { + quicklist *ql = quicklistNew(fills[f], options[_i]); + char num[32]; + long long nums[5000]; + for (int i = 0; i < 33; i++) { + nums[i] = -5157318210846258176 + i; + int sz = ll2string(num, sizeof(num), nums[i]); + quicklistPushTail(ql, num, sz); + } + if (fills[f] == 32) ql_verify(ql, 2, 33, 32, 1); + quicklistDelRange(ql, -12, 3); + if (ql->count != 30) { + TEST_PRINT_INFO("Didn't delete exactly three elements! Count is: %lu", ql->count); + err++; + } + quicklistRelease(ql); + } + + long long stop = mstime(); + runtime[_i] += stop - start; + } + UNUSED(iter); + TEST_ASSERT(err == 0); + return 0; +} + +int test_quicklistVerifySpecificCompressionOfInteriorNodes(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + int accurate = flags & UNIT_TEST_ACCURATE; + + TEST("verify specific compression of interior nodes"); + + /* Run a longer test of compression depth outside of primary test loop. */ + int list_sizes[] = {250, 251, 500, 999, 1000}; + int list_count = accurate ? (int)(sizeof(list_sizes) / sizeof(*list_sizes)) : 1; + for (int list = 0; list < list_count; list++) { + for (int f = 0; f < fill_count; f++) { + for (int depth = 1; depth < 40; depth++) { + /* skip over many redundant test cases */ + quicklist *ql = quicklistNew(fills[f], depth); + for (int i = 0; i < list_sizes[list]; i++) { + quicklistPushTail(ql, genstr("hello TAIL", i + 1), 64); + quicklistPushHead(ql, genstr("hello HEAD", i + 1), 64); + } + + for (int step = 0; step < 2; step++) { + /* test remove node */ + if (step == 1) { + for (int i = 0; i < list_sizes[list] / 2; i++) { + unsigned char *data; + TEST_ASSERT(quicklistPop(ql, QUICKLIST_HEAD, &data, + NULL, NULL)); + zfree(data); + TEST_ASSERT(quicklistPop(ql, QUICKLIST_TAIL, &data, + NULL, NULL)); + zfree(data); + } + } + quicklistNode *node = ql->head; + unsigned int low_raw = ql->compress; + unsigned int high_raw = ql->len - ql->compress; + + for (unsigned int at = 0; at < ql->len; + at++, node = node->next) { + if (at < low_raw || at >= high_raw) { + if (node->encoding != QUICKLIST_NODE_ENCODING_RAW) { + TEST_PRINT_INFO("Incorrect compression: node %d is " + "compressed at depth %d ((%u, %u); total " + "nodes: %lu; size: %zu)", + at, depth, low_raw, high_raw, ql->len, + node->sz); + err++; + } + } else { + if (node->encoding != QUICKLIST_NODE_ENCODING_LZF) { + TEST_PRINT_INFO("Incorrect non-compression: node %d is NOT " + "compressed at depth %d ((%u, %u); total " + "nodes: %lu; size: %zu; attempted: %d)", + at, depth, low_raw, high_raw, ql->len, + node->sz, node->attempted_compress); + err++; + } + } + } + } + + quicklistRelease(ql); + } + } + } + TEST_ASSERT(err == 0); + return 0; +} + +/*----------------------------------------------------------------------------- + * Quicklist Bookmark Unit Test + *----------------------------------------------------------------------------*/ + +int test_quicklistBookmarkGetUpdatedToNextItem(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + TEST("bookmark get updated to next item"); + + quicklist *ql = quicklistNew(1, 0); + quicklistPushTail(ql, "1", 1); + quicklistPushTail(ql, "2", 1); + quicklistPushTail(ql, "3", 1); + quicklistPushTail(ql, "4", 1); + quicklistPushTail(ql, "5", 1); + TEST_ASSERT(ql->len == 5); + /* add two bookmarks, one pointing to the node before the last. */ + TEST_ASSERT(quicklistBookmarkCreate(&ql, "_dummy", ql->head->next)); + TEST_ASSERT(quicklistBookmarkCreate(&ql, "_test", ql->tail->prev)); + /* test that the bookmark returns the right node, delete it and see that the bookmark points to the last node */ + TEST_ASSERT(quicklistBookmarkFind(ql, "_test") == ql->tail->prev); + TEST_ASSERT(quicklistDelRange(ql, -2, 1)); + TEST_ASSERT(quicklistBookmarkFind(ql, "_test") == ql->tail); + /* delete the last node, and see that the bookmark was deleted. */ + TEST_ASSERT(quicklistDelRange(ql, -1, 1)); + TEST_ASSERT(quicklistBookmarkFind(ql, "_test") == NULL); + /* test that other bookmarks aren't affected */ + TEST_ASSERT(quicklistBookmarkFind(ql, "_dummy") == ql->head->next); + TEST_ASSERT(quicklistBookmarkFind(ql, "_missing") == NULL); + TEST_ASSERT(ql->len == 3); + quicklistBookmarksClear(ql); /* for coverage */ + TEST_ASSERT(quicklistBookmarkFind(ql, "_dummy") == NULL); + quicklistRelease(ql); + return 0; +} + +int test_quicklistBookmarkLimit(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + TEST("bookmark limit"); + + int i; + quicklist *ql = quicklistNew(1, 0); + quicklistPushHead(ql, "1", 1); + for (i = 0; i < QL_MAX_BM; i++) + TEST_ASSERT(quicklistBookmarkCreate(&ql, genstr("", i), ql->head)); + /* when all bookmarks are used, creation fails */ + TEST_ASSERT(!quicklistBookmarkCreate(&ql, "_test", ql->head)); + /* delete one and see that we can now create another */ + TEST_ASSERT(quicklistBookmarkDelete(ql, "0")); + TEST_ASSERT(quicklistBookmarkCreate(&ql, "_test", ql->head)); + /* delete one and see that the rest survive */ + TEST_ASSERT(quicklistBookmarkDelete(ql, "_test")); + for (i = 1; i < QL_MAX_BM; i++) + TEST_ASSERT(quicklistBookmarkFind(ql, genstr("", i)) == ql->head); + /* make sure the deleted ones are indeed gone */ + TEST_ASSERT(!quicklistBookmarkFind(ql, "0")); + TEST_ASSERT(!quicklistBookmarkFind(ql, "_test")); + quicklistRelease(ql); + return 0; +} + +int test_quicklistCompressAndDecompressQuicklistListpackNode(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + TEST("compress and decompress quicklist listpack node"); + + if (!(flags & UNIT_TEST_LARGE_MEMORY)) return 0; + + quicklistNode *node = quicklistCreateNode(); + node->entry = lpNew(0); + + /* Just to avoid triggering the assertion in __quicklistCompressNode(), + * it disables the passing of quicklist head or tail node. */ + node->prev = quicklistCreateNode(); + node->next = quicklistCreateNode(); + + /* Create a rand string */ + size_t sz = (1 << 25); /* 32MB per one entry */ + unsigned char *s = zmalloc(sz); + randstring(s, sz); + + /* Keep filling the node, until it reaches 1GB */ + for (int i = 0; i < 32; i++) { + node->entry = lpAppend(node->entry, s, sz); + node->sz = lpBytes((node)->entry); + + long long start = mstime(); + TEST_ASSERT(__quicklistCompressNode(node)); + TEST_ASSERT(__quicklistDecompressNode(node)); + TEST_PRINT_INFO("Compress and decompress: %zu MB in %.2f seconds.\n", + node->sz / 1024 / 1024, (float)(mstime() - start) / 1000); + } + + zfree(s); + zfree(node->prev); + zfree(node->next); + zfree(node->entry); + zfree(node); + return 0; +} + +int test_quicklistCompressAndDecomressQuicklistPlainNodeLargeThanUINT32MAX(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + TEST("compress and decomress quicklist plain node large than UINT32_MAX"); + + if (!(flags & UNIT_TEST_LARGE_MEMORY)) return 0; + +#if ULONG_MAX >= 0xffffffffffffffff + + size_t sz = (1ull << 32); + unsigned char *s = zmalloc(sz); + randstring(s, sz); + memcpy(s, "helloworld", 10); + memcpy(s + sz - 10, "1234567890", 10); + + quicklistNode *node = __quicklistCreateNode(QUICKLIST_NODE_CONTAINER_PLAIN, s, sz); + + /* Just to avoid triggering the assertion in __quicklistCompressNode(), + * it disables the passing of quicklist head or tail node. */ + node->prev = quicklistCreateNode(); + node->next = quicklistCreateNode(); + + long long start = mstime(); + TEST_ASSERT(__quicklistCompressNode(node)); + TEST_ASSERT(__quicklistDecompressNode(node)); + TEST_PRINT_INFO("Compress and decompress: %zu MB in %.2f seconds.\n", + node->sz / 1024 / 1024, (float)(mstime() - start) / 1000); + + TEST_ASSERT(memcmp(node->entry, "helloworld", 10) == 0); + TEST_ASSERT(memcmp(node->entry + sz - 10, "1234567890", 10) == 0); + zfree(node->prev); + zfree(node->next); + zfree(node->entry); + zfree(node); + +#endif + return 0; +} From 863d31280369a290c5b51f446a2c018ce3e98da0 Mon Sep 17 00:00:00 2001 From: Parth <661497+parthpatel@users.noreply.github.com> Date: Wed, 13 Nov 2024 21:50:55 -0800 Subject: [PATCH 13/60] Fix link-time optimization to work correctly for unit tests (i.e. -flto flag) (#1290) (#1296) * We compile various c files into object and package them into library (.a file) using ar to feed to unit tests. With new GCC versions, the objects inside such library don't participate in LTO process without additional flags. * Here is a direct quote from gcc documentation explaining this issue: "If you are not using a linker with plugin support and/or do not enable the linker plugin, then the objects inside libfoo.a are extracted and linked as usual, but they do not participate in the LTO optimization process. In order to make a static library suitable for both LTO optimization and usual linkage, compile its object files with -flto-ffat-lto-objects." * Read full documentation about -flto at https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html * Without this additional flag, I get following errors while executing "make test-unit". With this change, those errors go away. ``` ARCHIVE libvalkey.a ar: threads_mngr.o: plugin needed to handle lto object ... .. . /tmp/ccDYbMXL.ltrans0.ltrans.o: In function `dictClear': /local/workplace/elasticache/valkey/src/unit/../dict.c:776: undefined reference to `valkey_free' /local/workplace/elasticache/valkey/src/unit/../dict.c:770: undefined reference to `valkey_free' /tmp/ccDYbMXL.ltrans0.ltrans.o: In function `dictGetVal': ``` Fixes #1290 --------- Signed-off-by: Parth Patel <661497+parthpatel@users.noreply.github.com> --- src/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile b/src/Makefile index 21affe61a3..a76356e9d5 100644 --- a/src/Makefile +++ b/src/Makefile @@ -25,7 +25,7 @@ ifeq ($(OPTIMIZATION),-O3) ifeq (clang,$(CLANG)) OPTIMIZATION+=-flto else - OPTIMIZATION+=-flto=auto + OPTIMIZATION+=-flto=auto -ffat-lto-objects endif endif ifneq ($(OPTIMIZATION),-O0) From 32f7541fe34e5e0520a5917d09661756d330bd11 Mon Sep 17 00:00:00 2001 From: Qu Chen Date: Thu, 14 Nov 2024 00:45:47 -0800 Subject: [PATCH 14/60] Simplify dictType callbacks and move some macros from dict.h to dict.c (#1281) Remove the dict pointer argument to the `dictType` callbacks `keyDup`, `keyCompare`, `keyDestructor` and `valDestructor`. This argument was unused in all of the callback implementations. The macros `dictFreeKey()` and `dictFreeVal()` are made internal to dict and moved from dict.h to dict.c. They're also changed from macros to static inline functions. Signed-off-by: Qu Chen --- src/config.c | 9 ++++----- src/dict.c | 18 ++++++++++++++--- src/dict.h | 25 +++++++++++------------- src/eval.c | 3 +-- src/functions.c | 15 ++++++-------- src/latency.c | 3 +-- src/module.c | 3 +-- src/sentinel.c | 5 ++--- src/server.c | 43 ++++++++++++++--------------------------- src/server.h | 12 ++++++------ src/unit/test_dict.c | 8 ++------ src/unit/test_kvstore.c | 3 +-- src/valkey-benchmark.c | 6 ++---- src/valkey-cli.c | 19 +++++++----------- 14 files changed, 74 insertions(+), 98 deletions(-) diff --git a/src/config.c b/src/config.c index f718543c39..15fec15276 100644 --- a/src/config.c +++ b/src/config.c @@ -1013,15 +1013,14 @@ void configGetCommand(client *c) { #define CONFIG_REWRITE_SIGNATURE "# Generated by CONFIG REWRITE" -/* We use the following dictionary type to store where a configuration - * option is mentioned in the old configuration file, so it's - * like "maxmemory" -> list of line numbers (first line is zero). */ -void dictListDestructor(dict *d, void *val); - /* Sentinel config rewriting is implemented inside sentinel.c by * rewriteConfigSentinelOption(). */ void rewriteConfigSentinelOption(struct rewriteConfigState *state); +/* We use the following dictionary type to store where a configuration + * option is mentioned in the old configuration file, so it's + * like "maxmemory" -> list of line numbers (first line is zero). + */ dictType optionToLineDictType = { dictSdsCaseHash, /* hash function */ NULL, /* key dup */ diff --git a/src/dict.c b/src/dict.c index f164820584..48c0f815bb 100644 --- a/src/dict.c +++ b/src/dict.c @@ -576,7 +576,7 @@ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) { if (!position) return NULL; /* Dup the key if necessary. */ - if (d->type->keyDup) key = d->type->keyDup(d, key); + if (d->type->keyDup) key = d->type->keyDup(key); return dictInsertAtPosition(d, key, position); } @@ -640,7 +640,7 @@ int dictReplace(dict *d, void *key, void *val) { * reverse. */ void *oldval = dictGetVal(existing); dictSetVal(d, existing, val); - if (d->type->valDestructor) d->type->valDestructor(d, oldval); + if (d->type->valDestructor) d->type->valDestructor(oldval); return 0; } @@ -742,6 +742,18 @@ dictEntry *dictUnlink(dict *d, const void *key) { return dictGenericDelete(d, key, 1); } +inline static void dictFreeKey(dict *d, dictEntry *entry) { + if (d->type->keyDestructor) { + d->type->keyDestructor(dictGetKey(entry)); + } +} + +inline static void dictFreeVal(dict *d, dictEntry *entry) { + if (d->type->valDestructor) { + d->type->valDestructor(dictGetVal(entry)); + } +} + /* You need to call this function to really free the entry after a call * to dictUnlink(). It's safe to call this function with 'he' = NULL. */ void dictFreeUnlinkedEntry(dict *d, dictEntry *he) { @@ -919,7 +931,7 @@ void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table : (entryIsEmbedded(de) ? &decodeEntryEmbedded(de)->field : (panic("Entry type not supported"), NULL))) void dictSetKey(dict *d, dictEntry *de, void *key) { - void *k = d->type->keyDup ? d->type->keyDup(d, key) : key; + void *k = d->type->keyDup ? d->type->keyDup(key) : key; if (entryIsNormal(de)) { dictEntryNormal *_de = decodeEntryNormal(de); _de->key = k; diff --git a/src/dict.h b/src/dict.h index 1c9e059baa..88ebd7bf99 100644 --- a/src/dict.h +++ b/src/dict.h @@ -53,10 +53,10 @@ typedef struct dict dict; typedef struct dictType { /* Callbacks */ uint64_t (*hashFunction)(const void *key); - void *(*keyDup)(dict *d, const void *key); - int (*keyCompare)(dict *d, const void *key1, const void *key2); - void (*keyDestructor)(dict *d, void *key); - void (*valDestructor)(dict *d, void *obj); + void *(*keyDup)(const void *key); + int (*keyCompare)(const void *key1, const void *key2); + void (*keyDestructor)(void *key); + void (*valDestructor)(void *obj); int (*resizeAllowed)(size_t moreMem, double usedRatio); /* Invoked at the start of dict initialization/rehashing (old and new ht are already created) */ void (*rehashingStarted)(dict *d); @@ -144,16 +144,13 @@ typedef struct { #define DICT_HT_INITIAL_SIZE (1 << (DICT_HT_INITIAL_EXP)) /* ------------------------------- Macros ------------------------------------*/ -#define dictFreeVal(d, entry) \ - do { \ - if ((d)->type->valDestructor) (d)->type->valDestructor((d), dictGetVal(entry)); \ - } while (0) - -#define dictFreeKey(d, entry) \ - if ((d)->type->keyDestructor) (d)->type->keyDestructor((d), dictGetKey(entry)) - -#define dictCompareKeys(d, key1, key2) \ - (((d)->type->keyCompare) ? (d)->type->keyCompare((d), key1, key2) : (key1) == (key2)) +static inline int dictCompareKeys(dict *d, const void *key1, const void *key2) { + if (d->type->keyCompare) { + return d->type->keyCompare(key1, key2); + } else { + return (key1 == key2); + } +} #define dictMetadata(d) (&(d)->metadata) #define dictMetadataSize(d) ((d)->type->dictMetadataBytes ? (d)->type->dictMetadataBytes(d) : 0) diff --git a/src/eval.c b/src/eval.c index fd12e40ad2..e5d7d56aa2 100644 --- a/src/eval.c +++ b/src/eval.c @@ -57,8 +57,7 @@ void evalGenericCommandWithDebugging(client *c, int evalsha); sds ldbCatStackValue(sds s, lua_State *lua, int idx); listNode *luaScriptsLRUAdd(client *c, sds sha, int evalsha); -static void dictLuaScriptDestructor(dict *d, void *val) { - UNUSED(d); +static void dictLuaScriptDestructor(void *val) { if (val == NULL) return; /* Lazy freeing will set value to NULL. */ decrRefCount(((luaScript *)val)->body); zfree(val); diff --git a/src/functions.c b/src/functions.c index e950024bad..c9ec42b322 100644 --- a/src/functions.c +++ b/src/functions.c @@ -43,9 +43,9 @@ typedef enum { static size_t engine_cache_memory = 0; /* Forward declaration */ -static void engineFunctionDispose(dict *d, void *obj); -static void engineStatsDispose(dict *d, void *obj); -static void engineLibraryDispose(dict *d, void *obj); +static void engineFunctionDispose(void *obj); +static void engineStatsDispose(void *obj); +static void engineLibraryDispose(void *obj); static int functionsVerifyName(sds name); typedef struct functionsLibEngineStats { @@ -126,15 +126,13 @@ static size_t libraryMallocSize(functionLibInfo *li) { return zmalloc_size(li) + sdsAllocSize(li->name) + sdsAllocSize(li->code); } -static void engineStatsDispose(dict *d, void *obj) { - UNUSED(d); +static void engineStatsDispose(void *obj) { functionsLibEngineStats *stats = obj; zfree(stats); } /* Dispose function memory */ -static void engineFunctionDispose(dict *d, void *obj) { - UNUSED(d); +static void engineFunctionDispose(void *obj) { if (!obj) { return; } @@ -158,8 +156,7 @@ static void engineLibraryFree(functionLibInfo *li) { zfree(li); } -static void engineLibraryDispose(dict *d, void *obj) { - UNUSED(d); +static void engineLibraryDispose(void *obj) { engineLibraryFree(obj); } diff --git a/src/latency.c b/src/latency.c index eef1532d03..783f04b197 100644 --- a/src/latency.c +++ b/src/latency.c @@ -37,8 +37,7 @@ #include "hdr_histogram.h" /* Dictionary type for latency events. */ -int dictStringKeyCompare(dict *d, const void *key1, const void *key2) { - UNUSED(d); +int dictStringKeyCompare(const void *key1, const void *key2) { return strcmp(key1, key2) == 0; } diff --git a/src/module.c b/src/module.c index 2884239200..1e98b36f30 100644 --- a/src/module.c +++ b/src/module.c @@ -11814,8 +11814,7 @@ uint64_t dictCStringKeyHash(const void *key) { return dictGenHashFunction((unsigned char *)key, strlen((char *)key)); } -int dictCStringKeyCompare(dict *d, const void *key1, const void *key2) { - UNUSED(d); +int dictCStringKeyCompare(const void *key1, const void *key2) { return strcmp(key1, key2) == 0; } diff --git a/src/sentinel.c b/src/sentinel.c index 711c4aea3e..ccd3ccbdca 100644 --- a/src/sentinel.c +++ b/src/sentinel.c @@ -416,8 +416,7 @@ void sentinelSimFailureCrash(void); void releaseSentinelValkeyInstance(sentinelValkeyInstance *ri); -void dictInstancesValDestructor(dict *d, void *obj) { - UNUSED(d); +void dictInstancesValDestructor(void *obj) { releaseSentinelValkeyInstance(obj); } @@ -4259,7 +4258,7 @@ void sentinelSetCommand(client *c) { /* If the target name is the same as the source name there * is no need to add an entry mapping to itself. */ - if (!dictSdsKeyCaseCompare(ri->renamed_commands, oldname, newname)) { + if (!dictSdsKeyCaseCompare(oldname, newname)) { oldname = sdsdup(oldname); newname = sdsdup(newname); dictAdd(ri->renamed_commands, oldname, newname); diff --git a/src/server.c b/src/server.c index 3217351faf..8841219697 100644 --- a/src/server.c +++ b/src/server.c @@ -360,25 +360,20 @@ void exitFromChild(int retcode) { * keys and Objects as values (Objects can hold SDS strings, * lists, sets). */ -void dictVanillaFree(dict *d, void *val) { - UNUSED(d); +void dictVanillaFree(void *val) { zfree(val); } -void dictListDestructor(dict *d, void *val) { - UNUSED(d); +void dictListDestructor(void *val) { listRelease((list *)val); } -void dictDictDestructor(dict *d, void *val) { - UNUSED(d); +void dictDictDestructor(void *val) { dictRelease((dict *)val); } -int dictSdsKeyCompare(dict *d, const void *key1, const void *key2) { +int dictSdsKeyCompare(const void *key1, const void *key2) { int l1, l2; - UNUSED(d); - l1 = sdslen((sds)key1); l2 = sdslen((sds)key2); if (l1 != l2) return 0; @@ -391,30 +386,26 @@ size_t dictSdsEmbedKey(unsigned char *buf, size_t buf_len, const void *key, uint /* A case insensitive version used for the command lookup table and other * places where case insensitive non binary-safe comparison is needed. */ -int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2) { - UNUSED(d); +int dictSdsKeyCaseCompare(const void *key1, const void *key2) { return strcasecmp(key1, key2) == 0; } -void dictObjectDestructor(dict *d, void *val) { - UNUSED(d); +void dictObjectDestructor(void *val) { if (val == NULL) return; /* Lazy freeing will set value to NULL. */ decrRefCount(val); } -void dictSdsDestructor(dict *d, void *val) { - UNUSED(d); +void dictSdsDestructor(void *val) { sdsfree(val); } -void *dictSdsDup(dict *d, const void *key) { - UNUSED(d); +void *dictSdsDup(const void *key) { return sdsdup((const sds)key); } -int dictObjKeyCompare(dict *d, const void *key1, const void *key2) { +int dictObjKeyCompare(const void *key1, const void *key2) { const robj *o1 = key1, *o2 = key2; - return dictSdsKeyCompare(d, o1->ptr, o2->ptr); + return dictSdsKeyCompare(o1->ptr, o2->ptr); } uint64_t dictObjHash(const void *key) { @@ -446,16 +437,13 @@ uint64_t dictClientHash(const void *key) { } /* Dict compare function for client */ -int dictClientKeyCompare(dict *d, const void *key1, const void *key2) { - UNUSED(d); +int dictClientKeyCompare(const void *key1, const void *key2) { return ((client *)key1)->id == ((client *)key2)->id; } /* Dict compare function for null terminated string */ -int dictCStrKeyCompare(dict *d, const void *key1, const void *key2) { +int dictCStrKeyCompare(const void *key1, const void *key2) { int l1, l2; - UNUSED(d); - l1 = strlen((char *)key1); l2 = strlen((char *)key2); if (l1 != l2) return 0; @@ -463,12 +451,11 @@ int dictCStrKeyCompare(dict *d, const void *key1, const void *key2) { } /* Dict case insensitive compare function for null terminated string */ -int dictCStrKeyCaseCompare(dict *d, const void *key1, const void *key2) { - UNUSED(d); +int dictCStrKeyCaseCompare(const void *key1, const void *key2) { return strcasecmp(key1, key2) == 0; } -int dictEncObjKeyCompare(dict *d, const void *key1, const void *key2) { +int dictEncObjKeyCompare(const void *key1, const void *key2) { robj *o1 = (robj *)key1, *o2 = (robj *)key2; int cmp; @@ -480,7 +467,7 @@ int dictEncObjKeyCompare(dict *d, const void *key1, const void *key2) { * objects as well. */ if (o1->refcount != OBJ_STATIC_REFCOUNT) o1 = getDecodedObject(o1); if (o2->refcount != OBJ_STATIC_REFCOUNT) o2 = getDecodedObject(o2); - cmp = dictSdsKeyCompare(d, o1->ptr, o2->ptr); + cmp = dictSdsKeyCompare(o1->ptr, o2->ptr); if (o1->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o1); if (o2->refcount != OBJ_STATIC_REFCOUNT) decrRefCount(o2); return cmp; diff --git a/src/server.h b/src/server.h index 5cf56e9c86..c7a9806cac 100644 --- a/src/server.h +++ b/src/server.h @@ -2730,7 +2730,7 @@ int serverSetProcTitle(char *title); int validateProcTitleTemplate(const char *template); int serverCommunicateSystemd(const char *sd_notify_msg); void serverSetCpuAffinity(const char *cpulist); -void dictVanillaFree(dict *d, void *val); +void dictVanillaFree(void *val); /* ERROR STATS constants */ @@ -3717,11 +3717,11 @@ void startEvictionTimeProc(void); /* Keys hashing / comparison functions for dict.c hash tables. */ uint64_t dictSdsHash(const void *key); uint64_t dictSdsCaseHash(const void *key); -int dictSdsKeyCompare(dict *d, const void *key1, const void *key2); -int dictSdsKeyCaseCompare(dict *d, const void *key1, const void *key2); -void dictSdsDestructor(dict *d, void *val); -void dictListDestructor(dict *d, void *val); -void *dictSdsDup(dict *d, const void *key); +int dictSdsKeyCompare(const void *key1, const void *key2); +int dictSdsKeyCaseCompare(const void *key1, const void *key2); +void dictSdsDestructor(void *val); +void dictListDestructor(void *val); +void *dictSdsDup(const void *key); /* Git SHA1 */ char *serverGitSHA1(void); diff --git a/src/unit/test_dict.c b/src/unit/test_dict.c index a5af4eef79..b03d252c74 100644 --- a/src/unit/test_dict.c +++ b/src/unit/test_dict.c @@ -5,19 +5,15 @@ uint64_t hashCallback(const void *key) { return dictGenHashFunction((unsigned char *)key, strlen((char *)key)); } -int compareCallback(dict *d, const void *key1, const void *key2) { +int compareCallback(const void *key1, const void *key2) { int l1, l2; - UNUSED(d); - l1 = strlen((char *)key1); l2 = strlen((char *)key2); if (l1 != l2) return 0; return memcmp(key1, key2, l1) == 0; } -void freeCallback(dict *d, void *val) { - UNUSED(d); - +void freeCallback(void *val) { zfree(val); } diff --git a/src/unit/test_kvstore.c b/src/unit/test_kvstore.c index b3eff7d132..062b9f32fc 100644 --- a/src/unit/test_kvstore.c +++ b/src/unit/test_kvstore.c @@ -5,8 +5,7 @@ uint64_t hashTestCallback(const void *key) { return dictGenHashFunction((unsigned char *)key, strlen((char *)key)); } -void freeTestCallback(dict *d, void *val) { - UNUSED(d); +void freeTestCallback(void *val) { zfree(val); } diff --git a/src/valkey-benchmark.c b/src/valkey-benchmark.c index b22ee8cbed..57cdd6fc16 100644 --- a/src/valkey-benchmark.c +++ b/src/valkey-benchmark.c @@ -199,7 +199,7 @@ static long long showThroughput(struct aeEventLoop *eventLoop, long long id, voi /* Dict callbacks */ static uint64_t dictSdsHash(const void *key); -static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2); +static int dictSdsKeyCompare(const void *key1, const void *key2); /* Implementation */ static long long ustime(void) { @@ -220,10 +220,8 @@ static uint64_t dictSdsHash(const void *key) { return dictGenHashFunction((unsigned char *)key, sdslen((char *)key)); } -static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2) { +static int dictSdsKeyCompare(const void *key1, const void *key2) { int l1, l2; - UNUSED(d); - l1 = sdslen((sds)key1); l2 = sdslen((sds)key2); if (l1 != l2) return 0; diff --git a/src/valkey-cli.c b/src/valkey-cli.c index b4a7fcaf91..0ba03dc6ba 100644 --- a/src/valkey-cli.c +++ b/src/valkey-cli.c @@ -172,9 +172,9 @@ static struct termios orig_termios; /* To restore terminal at exit.*/ /* Dict Helpers */ static uint64_t dictSdsHash(const void *key); -static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2); -static void dictSdsDestructor(dict *d, void *val); -static void dictListDestructor(dict *d, void *val); +static int dictSdsKeyCompare(const void *key1, const void *key2); +static void dictSdsDestructor(void *val); +static void dictListDestructor(void *val); /* Cluster Manager Command Info */ typedef struct clusterManagerCommand { @@ -371,23 +371,19 @@ static uint64_t dictSdsHash(const void *key) { return dictGenHashFunction((unsigned char *)key, sdslen((char *)key)); } -static int dictSdsKeyCompare(dict *d, const void *key1, const void *key2) { +static int dictSdsKeyCompare(const void *key1, const void *key2) { int l1, l2; - UNUSED(d); - l1 = sdslen((sds)key1); l2 = sdslen((sds)key2); if (l1 != l2) return 0; return memcmp(key1, key2, l1) == 0; } -static void dictSdsDestructor(dict *d, void *val) { - UNUSED(d); +static void dictSdsDestructor(void *val) { sdsfree(val); } -void dictListDestructor(dict *d, void *val) { - UNUSED(d); +void dictListDestructor(void *val) { listRelease((list *)val); } @@ -8663,9 +8659,8 @@ static typeinfo *typeinfo_add(dict *types, char *name, typeinfo *type_template) return info; } -void type_free(dict *d, void *val) { +void type_free(void *val) { typeinfo *info = val; - UNUSED(d); if (info->biggest_key) sdsfree(info->biggest_key); sdsfree(info->name); zfree(info); From b9994030e952788c8f736bcd02387dddf2c8b1cb Mon Sep 17 00:00:00 2001 From: bentotten <59932872+bentotten@users.noreply.github.com> Date: Thu, 14 Nov 2024 20:48:48 -0800 Subject: [PATCH 15/60] Log clusterbus handshake timeout failures (#1247) This adds a log when a handshake fails for a timeout. This can help troubleshoot cluster asymmetry issues caused by failed MEETs --------- Signed-off-by: Ben Totten Signed-off-by: bentotten <59932872+bentotten@users.noreply.github.com> Co-authored-by: Ben Totten Co-authored-by: Madelyn Olson --- src/cluster_legacy.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index cfde3fd797..f1d3b878c2 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -4909,6 +4909,8 @@ static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t handshake_ /* A Node in HANDSHAKE state has a limited lifespan equal to the * configured node timeout. */ if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) { + serverLog(LL_WARNING, "Clusterbus handshake timeout %s:%d after %lldms", node->ip, + node->cport, handshake_timeout); clusterDelNode(node); return 1; } From d3f3b9cc3a452b6d18e9e862dcae5a923952c8da Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 15 Nov 2024 14:27:28 +0800 Subject: [PATCH 16/60] Fix daily valgrind build with unit tests (#1309) This was introduced in #515. Signed-off-by: Binbin --- .github/workflows/daily.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 62eecb1fa8..8e9045fe4b 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -506,7 +506,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make all-with-unit-tests valgrind SERVER_CFLAGS='-Werror' + run: make valgrind all-with-unit-tests SERVER_CFLAGS='-Werror' - name: testprep run: | sudo apt-get update @@ -575,7 +575,7 @@ jobs: repository: ${{ env.GITHUB_REPOSITORY }} ref: ${{ env.GITHUB_HEAD_REF }} - name: make - run: make all-with-unit-tests valgrind CFLAGS="-DNO_MALLOC_USABLE_SIZE" SERVER_CFLAGS='-Werror' + run: make valgrind all-with-unit-tests CFLAGS="-DNO_MALLOC_USABLE_SIZE" SERVER_CFLAGS='-Werror' - name: testprep run: | sudo apt-get update From 4e2493e5c961b36e6832e8d6ea259939b0cf0fde Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 15 Nov 2024 16:34:32 +0800 Subject: [PATCH 17/60] Kill diskless fork child asap when the last replica drop (#1227) We originally checked the replica connection to whether to kill the diskless child only when rdbPipeReadHandler is triggered. Actually we can check it when the replica is disconnected, so that we don't have to wait for rdbPipeReadHandler to be triggered and can kill the forkless child as soon as possible. In this way, when the child or rdbPipeReadHandler is stuck for some reason, we can kill the child faster and release the fork resources. Signed-off-by: Binbin --- src/networking.c | 8 +++++++- src/replication.c | 12 ++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/networking.c b/src/networking.c index 1a008a852d..0db1fda8d7 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1555,12 +1555,17 @@ void unlinkClient(client *c) { * in which case it needs to be cleaned from that list */ if (c->flag.replica && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.rdb_pipe_conns) { int i; + int still_alive = 0; for (i = 0; i < server.rdb_pipe_numconns; i++) { if (server.rdb_pipe_conns[i] == c->conn) { rdbPipeWriteHandlerConnRemoved(c->conn); server.rdb_pipe_conns[i] = NULL; - break; } + if (server.rdb_pipe_conns[i]) still_alive++; + } + if (still_alive == 0) { + serverLog(LL_NOTICE, "Diskless rdb transfer, last replica dropped, killing fork child."); + killRDBChild(); } } /* Only use shutdown when the fork is active and we are the parent. */ @@ -1781,6 +1786,7 @@ void freeClient(client *c) { if (server.saveparamslen == 0 && c->repl_state == REPLICA_STATE_WAIT_BGSAVE_END && server.child_type == CHILD_TYPE_RDB && server.rdb_child_type == RDB_CHILD_TYPE_DISK && anyOtherReplicaWaitRdb(c) == 0) { + serverLog(LL_NOTICE, "Background saving, persistence disabled, last replica dropped, killing fork child."); killRDBChild(); } if (c->repl_state == REPLICA_STATE_SEND_BULK) { diff --git a/src/replication.c b/src/replication.c index 48e98ab8e7..ce2f5d7983 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1669,7 +1669,9 @@ void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, if (!conn) continue; stillUp++; } - serverLog(LL_NOTICE, "Diskless rdb transfer, done reading from pipe, %d replicas still up.", stillUp); + if (stillUp) { + serverLog(LL_NOTICE, "Diskless rdb transfer, done reading from pipe, %d replicas still up.", stillUp); + } /* Now that the replicas have finished reading, notify the child that it's safe to exit. * When the server detects the child has exited, it can mark the replica as online, and * start streaming the replication buffers. */ @@ -1678,7 +1680,6 @@ void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, return; } - int stillAlive = 0; for (i = 0; i < server.rdb_pipe_numconns; i++) { ssize_t nwritten; connection *conn = server.rdb_pipe_conns[i]; @@ -1708,15 +1709,10 @@ void rdbPipeReadHandler(struct aeEventLoop *eventLoop, int fd, void *clientData, server.rdb_pipe_numconns_writing++; connSetWriteHandler(conn, rdbPipeWriteHandler); } - stillAlive++; } - if (stillAlive == 0) { - serverLog(LL_WARNING, "Diskless rdb transfer, last replica dropped, killing fork child."); - killRDBChild(); - } /* Remove the pipe read handler if at least one write handler was set. */ - if (server.rdb_pipe_numconns_writing || stillAlive == 0) { + if (server.rdb_pipe_numconns_writing) { aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE); break; } From 92181b67970efad6df82ea2319ccd4a266dfec5e Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 15 Nov 2024 16:47:15 +0800 Subject: [PATCH 18/60] Fix primary crash when processing dirty slots during shutdown wait / failover wait / client pause (#1131) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have an assert in propagateNow. If the primary node receives a CLUSTER UPDATE such as dirty slots during SIGTERM waitting or during a manual failover pausing or during a client pause, the delKeysInSlot call will trigger this assert and cause primary crash. In this case, we added a new server_del_keys_in_slot state just like client_pause_in_transaction to track the state to avoid the assert in propagateNow, the dirty slots will be deleted in the end without affecting the data consistency. Signed-off-by: Binbin Co-authored-by: Viktor Söderqvist --- src/cluster_legacy.c | 5 ++ src/networking.c | 2 +- src/server.c | 24 +++++++- src/server.h | 3 +- tests/unit/cluster/slot-ownership.tcl | 85 +++++++++++++++++++++++++++ tests/unit/pause.tcl | 27 +++++++++ 6 files changed, 142 insertions(+), 4 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index f1d3b878c2..69af65f1e8 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -6084,6 +6084,9 @@ void removeChannelsInSlot(unsigned int slot) { unsigned int delKeysInSlot(unsigned int hashslot) { if (!countKeysInSlot(hashslot)) return 0; + /* We may lose a slot during the pause. We need to track this + * state so that we don't assert in propagateNow(). */ + server.server_del_keys_in_slot = 1; unsigned int j = 0; kvstoreDictIterator *kvs_di = NULL; @@ -6108,6 +6111,8 @@ unsigned int delKeysInSlot(unsigned int hashslot) { } kvstoreReleaseDictIterator(kvs_di); + server.server_del_keys_in_slot = 0; + serverAssert(server.execution_nesting == 0); return j; } diff --git a/src/networking.c b/src/networking.c index 0db1fda8d7..4791055b5a 100644 --- a/src/networking.c +++ b/src/networking.c @@ -4571,7 +4571,7 @@ static void pauseClientsByClient(mstime_t endTime, int isPauseClientAll) { } /* Pause actions up to the specified unixtime (in ms) for a given type of - * commands. + * purpose. * * A main use case of this function is to allow pausing replication traffic * so that a failover without data loss to occur. Replicas will continue to receive diff --git a/src/server.c b/src/server.c index 8841219697..12691df8ee 100644 --- a/src/server.c +++ b/src/server.c @@ -3315,8 +3315,28 @@ static void propagateNow(int dbid, robj **argv, int argc, int target) { if (!shouldPropagate(target)) return; /* This needs to be unreachable since the dataset should be fixed during - * replica pause (otherwise data may be lost during a failover) */ - serverAssert(!(isPausedActions(PAUSE_ACTION_REPLICA) && (!server.client_pause_in_transaction))); + * replica pause (otherwise data may be lost during a failover). + * + * Though, there are exceptions: + * + * 1. We allow write commands that were queued up before and after to + * execute, if a CLIENT PAUSE executed during a transaction, we will + * track the state, the CLIENT PAUSE takes effect only after a transaction + * has finished. + * 2. Primary loses a slot during the pause, deletes all keys and replicates + * DEL to its replicas. In this case, we will track the state, the dirty + * slots will be deleted in the end without affecting the data consistency. + * + * Note that case 2 can happen in one of the following scenarios: + * 1) The primary waits for the replica to replicate before exiting, see + * shutdown-timeout in conf for more details. In this case, primary lost + * a slot during the SIGTERM waiting. + * 2) The primary waits for the replica to replicate during a manual failover. + * In this case, primary lost a slot during the pausing. + * 3) The primary was paused by CLIENT PAUSE, and lost a slot during the + * pausing. */ + serverAssert(!isPausedActions(PAUSE_ACTION_REPLICA) || server.client_pause_in_transaction || + server.server_del_keys_in_slot); if (server.aof_state != AOF_OFF && target & PROPAGATE_AOF) feedAppendOnlyFile(dbid, argv, argc); if (target & PROPAGATE_REPL) replicationFeedReplicas(dbid, argv, argc); diff --git a/src/server.h b/src/server.h index c7a9806cac..5ef04a9080 100644 --- a/src/server.h +++ b/src/server.h @@ -1701,6 +1701,7 @@ struct valkeyServer { const char *busy_module_yield_reply; /* When non-null, we are inside RM_Yield. */ char *ignore_warnings; /* Config: warnings that should be ignored. */ int client_pause_in_transaction; /* Was a client pause executed during this Exec? */ + int server_del_keys_in_slot; /* The server is deleting the keys in the dirty slot. */ int thp_enabled; /* If true, THP is enabled. */ size_t page_size; /* The page size of OS. */ /* Modules */ @@ -2863,7 +2864,7 @@ void flushReplicasOutputBuffers(void); void disconnectReplicas(void); void evictClients(void); int listenToPort(connListener *fds); -void pauseActions(pause_purpose purpose, mstime_t end, uint32_t actions_bitmask); +void pauseActions(pause_purpose purpose, mstime_t end, uint32_t actions); void unpauseActions(pause_purpose purpose); uint32_t isPausedActions(uint32_t action_bitmask); uint32_t isPausedActionsWithUpdate(uint32_t action_bitmask); diff --git a/tests/unit/cluster/slot-ownership.tcl b/tests/unit/cluster/slot-ownership.tcl index 0f3e3cc4f7..0073c2904f 100644 --- a/tests/unit/cluster/slot-ownership.tcl +++ b/tests/unit/cluster/slot-ownership.tcl @@ -59,3 +59,88 @@ start_cluster 2 2 {tags {external:skip cluster}} { } } } + +start_cluster 3 1 {tags {external:skip cluster} overrides {shutdown-timeout 100}} { + test "Primary lost a slot during the shutdown waiting" { + R 0 set FOO 0 + + # Pause the replica. + pause_process [srv -3 pid] + + # Incr the key and immediately shutdown the primary. + # The primary waits for the replica to replicate before exiting. + R 0 incr FOO + exec kill -SIGTERM [srv 0 pid] + wait_for_condition 50 100 { + [s 0 shutdown_in_milliseconds] > 0 + } else { + fail "Primary not indicating ongoing shutdown." + } + + # Move the slot to other primary + R 1 cluster bumpepoch + R 1 cluster setslot [R 1 cluster keyslot FOO] node [R 1 cluster myid] + + # Waiting for dirty slot update. + wait_for_log_messages 0 {"*Deleting keys in dirty slot*"} 0 1000 10 + + # Resume the replica and make sure primary exits normally instead of crashing. + resume_process [srv -3 pid] + wait_for_log_messages 0 {"*Valkey is now ready to exit, bye bye*"} 0 1000 10 + + # Make sure that the replica will become the new primary and does not own the key. + wait_for_condition 1000 50 { + [s -3 role] eq {master} + } else { + fail "The replica was not converted into primary" + } + assert_error {ERR no such key} {R 3 debug object foo} + } +} + +start_cluster 3 1 {tags {external:skip cluster}} { + test "Primary lost a slot during the manual failover pausing" { + R 0 set FOO 0 + + # Set primaries to drop the FAILOVER_AUTH_REQUEST packets, so that + # primary 0 will pause until the failover times out. + R 1 debug drop-cluster-packet-filter 5 + R 2 debug drop-cluster-packet-filter 5 + + # Replica doing the manual failover. + R 3 cluster failover + + # Move the slot to other primary + R 1 cluster bumpepoch + R 1 cluster setslot [R 1 cluster keyslot FOO] node [R 1 cluster myid] + + # Waiting for dirty slot update. + wait_for_log_messages 0 {"*Deleting keys in dirty slot*"} 0 1000 10 + + # Make sure primary doesn't crash when deleting the keys. + R 0 ping + + R 1 debug drop-cluster-packet-filter -1 + R 2 debug drop-cluster-packet-filter -1 + } +} + +start_cluster 3 1 {tags {external:skip cluster}} { + test "Primary lost a slot during the client pause command" { + R 0 set FOO 0 + + R 0 client pause 1000000000 write + + # Move the slot to other primary + R 1 cluster bumpepoch + R 1 cluster setslot [R 1 cluster keyslot FOO] node [R 1 cluster myid] + + # Waiting for dirty slot update. + wait_for_log_messages 0 {"*Deleting keys in dirty slot*"} 0 1000 10 + + # Make sure primary doesn't crash when deleting the keys. + R 0 ping + + R 0 client unpause + } +} diff --git a/tests/unit/pause.tcl b/tests/unit/pause.tcl index 38c13afc46..b18a32d48f 100644 --- a/tests/unit/pause.tcl +++ b/tests/unit/pause.tcl @@ -260,6 +260,33 @@ start_server {tags {"pause network"}} { r client unpause } + test "Test eviction is skipped during client pause" { + r flushall + set evicted_keys [s 0 evicted_keys] + + r multi + r set foo{t} bar + r config set maxmemory-policy allkeys-random + r config set maxmemory 1 + r client PAUSE 50000 WRITE + r exec + + # No keys should actually have been evicted. + assert_match $evicted_keys [s 0 evicted_keys] + + # The previous config set triggers a time event, but due to the pause, + # no eviction has been made. After the unpause, a eviction will happen. + r client unpause + wait_for_condition 1000 10 { + [expr $evicted_keys + 1] eq [s 0 evicted_keys] + } else { + fail "Key is not evicted" + } + + r config set maxmemory 0 + r config set maxmemory-policy noeviction + } + test "Test both active and passive expires are skipped during client pause" { set expired_keys [s 0 expired_keys] r multi From 86f33ea2b05e0f14391942c635a87974eb103937 Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 15 Nov 2024 16:48:13 +0800 Subject: [PATCH 19/60] Unprotect rdb channel when bgsave child fails in dual channel replication (#1297) If bgsaveerr is error, there is no need to protect the rdb channel. The impact of this may be that when bgsave fails, we will protect the rdb channel for 60s. It may occupy the reference of the repl buf block, making it impossible to recycle it until we free the client due to COB or free the client after 60s. We kept the RDB channel open as long as the replica hadn't established a main connection, even if the snapshot process failed. There is no value in keeping the RDB client in this case. Signed-off-by: Binbin --- src/replication.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/replication.c b/src/replication.c index ce2f5d7983..48f02cf658 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1741,6 +1741,8 @@ void updateReplicasWaitingBgsave(int bgsaveerr, int type) { struct valkey_stat buf; if (bgsaveerr != C_OK) { + /* If bgsaveerr is error, there is no need to protect the rdb channel. */ + replica->flag.protected_rdb_channel = 0; freeClientAsync(replica); serverLog(LL_WARNING, "SYNC failed. BGSAVE child returned an error"); continue; From aa2dd3ecb82bce5d76f7796c5e6df3e5c6e55203 Mon Sep 17 00:00:00 2001 From: Binbin Date: Sat, 16 Nov 2024 18:58:25 +0800 Subject: [PATCH 20/60] Stabilize replica migration test to make sure cluster config is consistent (#1311) CI report this failure: ``` [exception]: Executing test client: MOVED 1 127.0.0.1:22128. MOVED 1 127.0.0.1:22128 while executing "wait_for_condition 1000 50 { [R 3 get key_991803] == 1024 && [R 3 get key_977613] == 10240 && [R 4 get key_991803] == 1024 && ..." ``` This may be because, even though the cluster state becomes OK, The cluster still has inconsistent configuration for a short period of time. We make sure to wait for the config to be consistent. Signed-off-by: Binbin --- tests/unit/cluster/replica-migration.tcl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/unit/cluster/replica-migration.tcl b/tests/unit/cluster/replica-migration.tcl index d04069ef16..591d732fce 100644 --- a/tests/unit/cluster/replica-migration.tcl +++ b/tests/unit/cluster/replica-migration.tcl @@ -90,6 +90,8 @@ proc test_migrated_replica {type} { # Wait for the cluster to be ok. wait_for_condition 1000 50 { + [R 3 cluster slots] eq [R 4 cluster slots] && + [R 4 cluster slots] eq [R 7 cluster slots] && [CI 3 cluster_state] eq "ok" && [CI 4 cluster_state] eq "ok" && [CI 7 cluster_state] eq "ok" @@ -187,6 +189,7 @@ proc test_nonempty_replica {type} { # Wait for the cluster to be ok. wait_for_condition 1000 50 { + [R 4 cluster slots] eq [R 7 cluster slots] && [CI 4 cluster_state] eq "ok" && [CI 7 cluster_state] eq "ok" } else { @@ -306,6 +309,8 @@ proc test_sub_replica {type} { # Wait for the cluster to be ok. wait_for_condition 1000 50 { + [R 3 cluster slots] eq [R 4 cluster slots] && + [R 4 cluster slots] eq [R 7 cluster slots] && [CI 3 cluster_state] eq "ok" && [CI 4 cluster_state] eq "ok" && [CI 7 cluster_state] eq "ok" From 94113fde7fb251e24911e51ab8cf2a696864ebb6 Mon Sep 17 00:00:00 2001 From: uriyage <78144248+uriyage@users.noreply.github.com> Date: Mon, 18 Nov 2024 07:52:35 +0200 Subject: [PATCH 21/60] Improvements for TLS with I/O threads (#1271) Main thread profiling revealed significant overhead in TLS operations, even with read/write offloaded to I/O threads: Perf results: **10.82%** 8.82% `valkey-server libssl.so.3 [.] SSL_pending` # Called by main thread after I/O completion **10.16%** 5.06% `valkey-server libcrypto.so.3 [.] ERR_clear_error` # Called for every event regardless of thread handling This commit further optimizes TLS operations by moving more work from the main thread to I/O threads: Improve TLS offloading to I/O threads with two main changes: 1. Move `ERR_clear_error()` calls closer to SSL operations - Currently, error queue is cleared for every TLS event - Now only clear before actual SSL function calls - This prevents unnecessary clearing in main thread when operations are handled by I/O threads 2. Optimize `SSL_pending()` checks - Add `TLS_CONN_FLAG_HAS_PENDING` flag to track pending data - Move pending check to follow read operations immediately - I/O thread sets flag when pending data exists - Main thread uses flag to update pending list Performance improvements: Testing setup based on https://valkey.io/blog/unlock-one-million-rps-part2/ Before: - SET: 896,047 ops/sec - GET: 875,794 ops/sec After: - SET: 985,784 ops/sec (+10% improvement) - GET: 1,066,171 ops/sec (+22% improvement) Signed-off-by: Uri Yagelnik --- src/tls.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/tls.c b/src/tls.c index f1c82d35e4..a1fda2a7ae 100644 --- a/src/tls.c +++ b/src/tls.c @@ -446,6 +446,7 @@ typedef enum { #define TLS_CONN_FLAG_WRITE_WANT_READ (1 << 1) #define TLS_CONN_FLAG_FD_SET (1 << 2) #define TLS_CONN_FLAG_POSTPONE_UPDATE_STATE (1 << 3) +#define TLS_CONN_FLAG_HAS_PENDING (1 << 4) typedef struct tls_connection { connection c; @@ -614,7 +615,7 @@ static void updatePendingData(tls_connection *conn) { /* If SSL has pending data, already read from the socket, we're at risk of not calling the read handler again, make * sure to add it to a list of pending connection that should be handled anyway. */ - if (SSL_pending(conn->ssl) > 0) { + if (conn->flags & TLS_CONN_FLAG_HAS_PENDING) { if (!conn->pending_list_node) { listAddNodeTail(pending_list, conn); conn->pending_list_node = listLast(pending_list); @@ -625,6 +626,14 @@ static void updatePendingData(tls_connection *conn) { } } +void updateSSLPendingFlag(tls_connection *conn) { + if (SSL_pending(conn->ssl) > 0) { + conn->flags |= TLS_CONN_FLAG_HAS_PENDING; + } else { + conn->flags &= ~TLS_CONN_FLAG_HAS_PENDING; + } +} + static void updateSSLEvent(tls_connection *conn) { if (conn->flags & TLS_CONN_FLAG_POSTPONE_UPDATE_STATE) return; @@ -653,8 +662,6 @@ static void tlsHandleEvent(tls_connection *conn, int mask) { TLSCONN_DEBUG("tlsEventHandler(): fd=%d, state=%d, mask=%d, r=%d, w=%d, flags=%d", fd, conn->c.state, mask, conn->c.read_handler != NULL, conn->c.write_handler != NULL, conn->flags); - ERR_clear_error(); - switch (conn->c.state) { case CONN_STATE_CONNECTING: conn_error = anetGetError(conn->c.fd); @@ -662,6 +669,7 @@ static void tlsHandleEvent(tls_connection *conn, int mask) { conn->c.last_errno = conn_error; conn->c.state = CONN_STATE_ERROR; } else { + ERR_clear_error(); if (!(conn->flags & TLS_CONN_FLAG_FD_SET)) { SSL_set_fd(conn->ssl, conn->c.fd); conn->flags |= TLS_CONN_FLAG_FD_SET; @@ -690,6 +698,7 @@ static void tlsHandleEvent(tls_connection *conn, int mask) { conn->c.conn_handler = NULL; break; case CONN_STATE_ACCEPTING: + ERR_clear_error(); ret = SSL_accept(conn->ssl); if (ret <= 0) { WantIOType want = 0; @@ -747,10 +756,7 @@ static void tlsHandleEvent(tls_connection *conn, int mask) { conn->flags &= ~TLS_CONN_FLAG_READ_WANT_WRITE; if (!callHandler((connection *)conn, conn->c.read_handler)) return; } - - if (mask & AE_READABLE) { - updatePendingData(conn); - } + updatePendingData(conn); break; } @@ -941,6 +947,7 @@ static int connTLSRead(connection *conn_, void *buf, size_t buf_len) { if (conn->c.state != CONN_STATE_CONNECTED) return -1; ERR_clear_error(); ret = SSL_read(conn->ssl, buf, buf_len); + updateSSLPendingFlag(conn); return updateStateAfterSSLIO(conn, ret, 1); } @@ -992,7 +999,7 @@ static int connTLSBlockingConnect(connection *conn_, const char *addr, int port, * which means the specified timeout will not be enforced accurately. */ SSL_set_fd(conn->ssl, conn->c.fd); setBlockingTimeout(conn, timeout); - + ERR_clear_error(); if ((ret = SSL_connect(conn->ssl)) <= 0) { conn->c.state = CONN_STATE_ERROR; return C_ERR; @@ -1023,6 +1030,7 @@ static ssize_t connTLSSyncRead(connection *conn_, char *ptr, ssize_t size, long setBlockingTimeout(conn, timeout); ERR_clear_error(); int ret = SSL_read(conn->ssl, ptr, size); + updateSSLPendingFlag(conn); ret = updateStateAfterSSLIO(conn, ret, 0); unsetBlockingTimeout(conn); @@ -1041,6 +1049,7 @@ static ssize_t connTLSSyncReadLine(connection *conn_, char *ptr, ssize_t size, l ERR_clear_error(); int ret = SSL_read(conn->ssl, &c, 1); + updateSSLPendingFlag(conn); ret = updateStateAfterSSLIO(conn, ret, 0); if (ret <= 0) { nread = -1; From d07674fc01fd9b3b4fdd8c13de74d3d28697ddc5 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 18 Nov 2024 14:55:26 +0800 Subject: [PATCH 22/60] Fix sds unittest tests to check for zmalloc_usable_size (#1314) s_malloc_size == zmalloc_size, currently sdsAllocSize does not calculate PREFIX_SIZE when no malloc_size available, this casue test_typesAndAllocSize fail in the new unittest, what we want to check is actually zmalloc_usable_size. Signed-off-by: Binbin --- src/unit/test_sds.c | 15 ++++++++------- src/unit/test_zmalloc.c | 2 ++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/unit/test_sds.c b/src/unit/test_sds.c index b97d0d9d32..30f25e4f6f 100644 --- a/src/unit/test_sds.c +++ b/src/unit/test_sds.c @@ -259,43 +259,44 @@ int test_typesAndAllocSize(int argc, char **argv, int flags) { sds x = sdsnewlen(NULL, 31); TEST_ASSERT_MESSAGE("len 31 type", (x[-1] & SDS_TYPE_MASK) == SDS_TYPE_5); + TEST_ASSERT_MESSAGE("len 31 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); x = sdsnewlen(NULL, 32); TEST_ASSERT_MESSAGE("len 32 type", (x[-1] & SDS_TYPE_MASK) >= SDS_TYPE_8); - TEST_ASSERT_MESSAGE("len 32 sdsAllocSize", sdsAllocSize(x) == s_malloc_size(sdsAllocPtr(x))); + TEST_ASSERT_MESSAGE("len 32 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); x = sdsnewlen(NULL, 252); TEST_ASSERT_MESSAGE("len 252 type", (x[-1] & SDS_TYPE_MASK) >= SDS_TYPE_8); - TEST_ASSERT_MESSAGE("len 252 sdsAllocSize", sdsAllocSize(x) == s_malloc_size(sdsAllocPtr(x))); + TEST_ASSERT_MESSAGE("len 252 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); x = sdsnewlen(NULL, 253); TEST_ASSERT_MESSAGE("len 253 type", (x[-1] & SDS_TYPE_MASK) == SDS_TYPE_16); - TEST_ASSERT_MESSAGE("len 253 sdsAllocSize", sdsAllocSize(x) == s_malloc_size(sdsAllocPtr(x))); + TEST_ASSERT_MESSAGE("len 253 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); x = sdsnewlen(NULL, 65530); TEST_ASSERT_MESSAGE("len 65530 type", (x[-1] & SDS_TYPE_MASK) >= SDS_TYPE_16); - TEST_ASSERT_MESSAGE("len 65530 sdsAllocSize", sdsAllocSize(x) == s_malloc_size(sdsAllocPtr(x))); + TEST_ASSERT_MESSAGE("len 65530 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); x = sdsnewlen(NULL, 65531); TEST_ASSERT_MESSAGE("len 65531 type", (x[-1] & SDS_TYPE_MASK) >= SDS_TYPE_32); - TEST_ASSERT_MESSAGE("len 65531 sdsAllocSize", sdsAllocSize(x) == s_malloc_size(sdsAllocPtr(x))); + TEST_ASSERT_MESSAGE("len 65531 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); #if (LONG_MAX == LLONG_MAX) if (flags & UNIT_TEST_LARGE_MEMORY) { x = sdsnewlen(NULL, 4294967286); TEST_ASSERT_MESSAGE("len 4294967286 type", (x[-1] & SDS_TYPE_MASK) >= SDS_TYPE_32); - TEST_ASSERT_MESSAGE("len 4294967286 sdsAllocSize", sdsAllocSize(x) == s_malloc_size(sdsAllocPtr(x))); + TEST_ASSERT_MESSAGE("len 4294967286 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); x = sdsnewlen(NULL, 4294967287); TEST_ASSERT_MESSAGE("len 4294967287 type", (x[-1] & SDS_TYPE_MASK) == SDS_TYPE_64); - TEST_ASSERT_MESSAGE("len 4294967287 sdsAllocSize", sdsAllocSize(x) == s_malloc_size(sdsAllocPtr(x))); + TEST_ASSERT_MESSAGE("len 4294967287 sdsAllocSize", sdsAllocSize(x) == s_malloc_usable_size(sdsAllocPtr(x))); sdsfree(x); } #endif diff --git a/src/unit/test_zmalloc.c b/src/unit/test_zmalloc.c index 6c1d03e8e1..08444a157e 100644 --- a/src/unit/test_zmalloc.c +++ b/src/unit/test_zmalloc.c @@ -6,6 +6,8 @@ int test_zmallocInitialUsedMemory(int argc, char **argv, int flags) { UNUSED(argv); UNUSED(flags); + /* If this fails, it may be that other tests have failed and the memory has not been released. */ + TEST_PRINT_INFO("test_zmallocInitialUsedMemory; used: %zu\n", zmalloc_used_memory()); TEST_ASSERT(zmalloc_used_memory() == 0); return 0; From c5012cc630bb65c07a17ea870630edd8825cde52 Mon Sep 17 00:00:00 2001 From: Amit Nagler <58042354+naglera@users.noreply.github.com> Date: Mon, 18 Nov 2024 13:09:35 +0200 Subject: [PATCH 23/60] Optimize RDB load performance and fix cluster mode resizing on replica side (#1199) This PR addresses two issues: 1. Performance Degradation Fix - Resolves a significant performance issue during RDB load on replica nodes. - The problem was causing replicas to rehash multiple times during the load process. Local testing demonstrated up to 50% degradation in BGSAVE time. - The problem occurs when the replica tries to expand pre-created slot dictionaries. This operation fails quietly, resulting in undetected performance issues. - This fix aims to optimize the RDB load process and restore expected performance levels. 2. Bug fix when reading `RDB_OPCODE_RESIZEDB` in Valkey 8.0 cluster mode- - Use the shard's master slots count when processing this opcode, as `clusterNodeCoversSlot` is not initialized for the currently syncing replica. - Previously, this problem went unnoticed because `RDB_OPCODE_RESIZEDB` had no practical impact (due to 1). These improvements will enhance overall system performance and ensure smoother upgrades to Valkey 8.0 in the future. Testing: - Conducted local tests to verify the performance improvement during RDB load. - Verified that ignoring `RDB_OPCODE_RESIZEDB` does not negatively impact functionality in the current version. Signed-off-by: naglera Co-authored-by: Binbin --- src/db.c | 2 +- src/kvstore.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/db.c b/src/db.c index 3e0e5a2e63..b59c7727b2 100644 --- a/src/db.c +++ b/src/db.c @@ -1884,7 +1884,7 @@ keyStatus expireIfNeeded(serverDb *db, robj *key, int flags) { * The purpose is to skip expansion of unused dicts in cluster mode (all * dicts not mapped to *my* slots) */ static int dbExpandSkipSlot(int slot) { - return !clusterNodeCoversSlot(getMyClusterNode(), slot); + return !clusterNodeCoversSlot(clusterNodeGetPrimary(getMyClusterNode()), slot); } /* diff --git a/src/kvstore.c b/src/kvstore.c index 7142fa0f61..49662f330a 100644 --- a/src/kvstore.c +++ b/src/kvstore.c @@ -423,9 +423,11 @@ unsigned long long kvstoreScan(kvstore *kvs, * `dictTryExpand` call and in case of `dictExpand` call it signifies no expansion was performed. */ int kvstoreExpand(kvstore *kvs, uint64_t newsize, int try_expand, kvstoreExpandShouldSkipDictIndex *skip_cb) { + if (newsize == 0) return 1; for (int i = 0; i < kvs->num_dicts; i++) { - dict *d = kvstoreGetDict(kvs, i); - if (!d || (skip_cb && skip_cb(i))) continue; + if (skip_cb && skip_cb(i)) continue; + /* If the dictionary doesn't exist, create it */ + dict *d = createDictIfNeeded(kvs, i); int result = try_expand ? dictTryExpand(d, newsize) : dictExpand(d, newsize); if (try_expand && result == DICT_ERR) return 0; } From f9d0b876224beecc8386cce5e11d43e649b82189 Mon Sep 17 00:00:00 2001 From: Seungmin Lee <155032684+sungming2@users.noreply.github.com> Date: Mon, 18 Nov 2024 18:00:30 -0800 Subject: [PATCH 24/60] Upgrade macos-12 to macos-13 in workflows (#1318) ### Problem GitHub Actions is starting the deprecation process for macOS 12. Deprecation will begin on 10/7/24 and the image will be fully unsupported by 12/3/24. For more details, see https://github.com/actions/runner-images/issues/10721 Signed-off-by: Seungmin Lee Co-authored-by: Seungmin Lee --- .github/workflows/daily.yml | 4 ++-- deps/hiredis/.github/workflows/build.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 8e9045fe4b..8bdbc8d4c2 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -990,7 +990,7 @@ jobs: strategy: fail-fast: false matrix: - os: [macos-12, macos-14] + os: [macos-13, macos-14] runs-on: ${{ matrix.os }} if: | (github.event_name == 'workflow_dispatch' || @@ -1019,7 +1019,7 @@ jobs: run: make SERVER_CFLAGS='-Werror' test-freebsd: - runs-on: macos-12 + runs-on: macos-13 if: | (github.event_name == 'workflow_dispatch' || (github.event_name == 'schedule' && github.repository == 'valkey-io/valkey') || diff --git a/deps/hiredis/.github/workflows/build.yml b/deps/hiredis/.github/workflows/build.yml index 581800b4f7..048ee51cd4 100644 --- a/deps/hiredis/.github/workflows/build.yml +++ b/deps/hiredis/.github/workflows/build.yml @@ -112,7 +112,7 @@ jobs: run: $GITHUB_WORKSPACE/test.sh freebsd: - runs-on: macos-12 + runs-on: macos-13 name: FreeBSD steps: - uses: actions/checkout@v3 From 3d0c8342030654bdfaf74d08d2d5645ff616c7a7 Mon Sep 17 00:00:00 2001 From: Seungmin Lee <155032684+sungming2@users.noreply.github.com> Date: Mon, 18 Nov 2024 18:06:35 -0800 Subject: [PATCH 25/60] Fix LRU crash when getting too many random lua scripts (#1310) ### Problem Valkey stores scripts in a dictionary (lua_scripts) keyed by their SHA1 hashes, but it needs a way to know which scripts are least recently used. It uses an LRU list (lua_scripts_lru_list) to keep track of scripts in usage order. When the list reaches a maximum length, Valkey evicts the oldest scripts to free memory in both the list and dictionary. The problem here is that the sds from the LRU list can be pointing to already freed/moved memory by active defrag that the sds in the dictionary used to point to. It results in assertion error at [this line](https://github.com/valkey-io/valkey/blob/unstable/src/eval.c#L519) ### Solution If we duplicate the sds when adding it to the LRU list, we can create an independent copy of the script identifier (sha). This duplication ensures that the sha string in the LRU list remains stable and unaffected by any defragmentation that could alter or free the original sds. In addition, dictUnlink doesn't require exact pointer match([ref](https://github.com/valkey-io/valkey/blob/unstable/src/eval.c#L71-L78)) so this change makes sense to unlink the right dictEntry with the copy of the sds. ### Reproduce To reproduce it with tcl test: 1. Disable je_get_defrag_hint in defrag.c to trigger defrag often 2. Execute test script ``` start_server {tags {"auth external:skip"}} { test {Regression for script LRU crash} { r config set activedefrag yes r config set active-defrag-ignore-bytes 1 r config set active-defrag-threshold-lower 0 r config set active-defrag-threshold-upper 1 r config set active-defrag-cycle-min 99 r config set active-defrag-cycle-max 99 for {set i 0} {$i < 100000} {incr i} { r eval "return $i" 0 } after 5000; } } ``` ### Crash info Crash report: ``` === REDIS BUG REPORT START: Cut & paste starting from here === 14044:M 12 Nov 2024 14:51:27.054 # === ASSERTION FAILED === 14044:M 12 Nov 2024 14:51:27.054 # ==> eval.c:556 'de' is not true ------ STACK TRACE ------ Backtrace: /usr/bin/redis-server 127.0.0.1:6379 [cluster](luaDeleteFunction+0x148)[0x723708] /usr/bin/redis-server 127.0.0.1:6379 [cluster](luaCreateFunction+0x26c)[0x724450] /usr/bin/redis-server 127.0.0.1:6379 [cluster](evalCommand+0x2bc)[0x7254dc] /usr/bin/redis-server 127.0.0.1:6379 [cluster](call+0x574)[0x5b8d14] /usr/bin/redis-server 127.0.0.1:6379 [cluster](processCommand+0xc84)[0x5b9b10] /usr/bin/redis-server 127.0.0.1:6379 [cluster](processCommandAndResetClient+0x11c)[0x6db63c] /usr/bin/redis-server 127.0.0.1:6379 [cluster](processInputBuffer+0x1b0)[0x6dffd4] /usr/bin/redis-server 127.0.0.1:6379 [cluster][0x6bd968] /usr/bin/redis-server 127.0.0.1:6379 [cluster][0x659634] /usr/bin/redis-server 127.0.0.1:6379 [cluster](amzTLSEventHandler+0x194)[0x6588d8] /usr/bin/redis-server 127.0.0.1:6379 [cluster][0x750c88] /usr/bin/redis-server 127.0.0.1:6379 [cluster](aeProcessEvents+0x228)[0x757fa8] /usr/bin/redis-server 127.0.0.1:6379 [cluster](redisMain+0x478)[0x7786b8] /lib64/libc.so.6(__libc_start_main+0xe4)[0xffffa7763da4] /usr/bin/redis-server 127.0.0.1:6379 [cluster][0x5ad3b0] ``` Defrag info: ``` mem_fragmentation_ratio:1.18 mem_fragmentation_bytes:47229992 active_defrag_hits:20561 active_defrag_misses:5878518 active_defrag_key_hits:77 active_defrag_key_misses:212 total_active_defrag_time:29009 ``` ### Test: Run the test script to push 100,000 scripts to ensure the LRU list keeps 500 maximum length without any crash. ``` 27489:M 14 Nov 2024 20:56:41.583 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.583 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 27489:M 14 Nov 2024 20:56:41.584 * LRU List length: 500 [ok]: Regression for script LRU crash (6811 ms) [1/1 done]: unit/test (7 seconds) ``` --------- Signed-off-by: Seungmin Lee Signed-off-by: Seungmin Lee <155032684+sungming2@users.noreply.github.com> Co-authored-by: Seungmin Lee Co-authored-by: Binbin --- src/eval.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/eval.c b/src/eval.c index e5d7d56aa2..a9c50cdf90 100644 --- a/src/eval.c +++ b/src/eval.c @@ -199,10 +199,12 @@ void scriptingInit(int setup) { } /* Initialize a dictionary we use to map SHAs to scripts. - * Initialize a list we use for lua script evictions, it shares the - * sha with the dictionary, so free fn is not set. */ + * Initialize a list we use for lua script evictions. + * Note that we duplicate the sha when adding to the lru list due to defrag, + * and we need to free them respectively. */ lctx.lua_scripts = dictCreate(&shaScriptObjectDictType); lctx.lua_scripts_lru_list = listCreate(); + listSetFreeMethod(lctx.lua_scripts_lru_list, (void (*)(void *))sdsfree); lctx.lua_scripts_mem = 0; luaRegisterServerAPI(lua); @@ -518,9 +520,6 @@ void luaDeleteFunction(client *c, sds sha) { dictEntry *de = dictUnlink(lctx.lua_scripts, sha); serverAssertWithInfo(c ? c : lctx.lua_client, NULL, de); luaScript *l = dictGetVal(de); - /* We only delete `EVAL` scripts, which must exist in the LRU list. */ - serverAssert(l->node); - listDelNode(lctx.lua_scripts_lru_list, l->node); lctx.lua_scripts_mem -= sdsAllocSize(sha) + getStringObjectSdsUsedMemory(l->body); dictFreeUnlinkedEntry(lctx.lua_scripts, de); } @@ -549,11 +548,12 @@ listNode *luaScriptsLRUAdd(client *c, sds sha, int evalsha) { listNode *ln = listFirst(lctx.lua_scripts_lru_list); sds oldest = listNodeValue(ln); luaDeleteFunction(c, oldest); + listDelNode(lctx.lua_scripts_lru_list, ln); server.stat_evictedscripts++; } /* Add current. */ - listAddNodeTail(lctx.lua_scripts_lru_list, sha); + listAddNodeTail(lctx.lua_scripts_lru_list, sdsdup(sha)); return listLast(lctx.lua_scripts_lru_list); } From 132798b57d7f95ad5901495d566578bf8ba71390 Mon Sep 17 00:00:00 2001 From: Binbin Date: Tue, 19 Nov 2024 23:42:50 +0800 Subject: [PATCH 26/60] Receipt of REPLCONF VERSION reply should be triggered by event (#1320) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This add the missing return when repl_state change to RECEIVE_VERSION_REPLY, this way we won’t be blocked if the primary doesn’t reply with REPLCONF VERSION. In practice i guess this is no likely to block in this context, reading small responses are are likely to be received in one packet, so this is just a cleanup (consistent with the previous state machine processing). Also update the state machine diagram to mention the VERSION reply. Signed-off-by: Binbin --- src/replication.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/replication.c b/src/replication.c index 48f02cf658..a809c4c166 100644 --- a/src/replication.c +++ b/src/replication.c @@ -3379,15 +3379,15 @@ void dualChannelSetupMainConnForPsync(connection *conn) { * ┌────────▼──────────┐ │ │ │DUAL_CHANNEL_RECEIVE_ENDOFF│ │ │by the primary │ * │RECEIVE_IP_REPLY │ │ │ └───────┬───────────────────┘ │ ┌──▼────────────────┐ │ * └────────┬──────────┘ │ │ │$ENDOFF │ │RECEIVE_PSYNC_REPLY│ │ - * │ │ │ ├─────────────────────────┘ └──┬────────────────┘ │ - * │ │ │ │ │+CONTINUE │ - * │ │ │ ┌───────▼───────────────┐ ┌──▼────────────────┐ │ - * │ │ │ │DUAL_CHANNEL_RDB_LOAD │ │TRANSFER │ │ + * │+OK │ │ ├─────────────────────────┘ └──┬────────────────┘ │ + * ┌────────▼──────────┐ │ │ │ │+CONTINUE │ + * │RECEIVE_CAPA_REPLY │ │ │ ┌───────▼───────────────┐ ┌──▼────────────────┐ │ + * └────────┬──────────┘ │ │ │DUAL_CHANNEL_RDB_LOAD │ │TRANSFER │ │ * │+OK │ │ └───────┬───────────────┘ └─────┬─────────────┘ │ - * ┌────────▼──────────┐ │ │ │Done loading │ │ - * │RECEIVE_CAPA_REPLY │ │ │ ┌───────▼───────────────┐ │ │ - * └────────┬──────────┘ │ │ │DUAL_CHANNEL_RDB_LOADED│ │ │ - * │ │ │ └───────┬───────────────┘ │ │ + * ┌────────▼─────────────┐ │ │ │Done loading │ │ + * │RECEIVE_VERSION_REPLY │ │ │ ┌───────▼───────────────┐ │ │ + * └────────┬─────────────┘ │ │ │DUAL_CHANNEL_RDB_LOADED│ │ │ + * │+OK │ │ └───────┬───────────────┘ │ │ * ┌────────▼───┐ │ │ │ │ │ * │SEND_PSYNC │ │ │ │Replica loads local replication │ │ * └─┬──────────┘ │ │ │buffer into memory │ │ @@ -3589,6 +3589,7 @@ void syncWithPrimary(connection *conn) { sdsfree(err); err = NULL; server.repl_state = REPL_STATE_RECEIVE_VERSION_REPLY; + return; } /* Receive VERSION reply. */ From ee386c92ffa9724771e4980064fa279655e46f90 Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 20 Nov 2024 00:17:20 +0800 Subject: [PATCH 27/60] Manual failover vote is not limited by two times the node timeout (#1305) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This limit should not restrict manual failover, otherwise in some scenarios, manual failover will time out. For example, if some FAILOVER_AUTH_REQUESTs or some FAILOVER_AUTH_ACKs are lost during a manual failover, it cannot vote in the second manual failover. Or in a mixed scenario of plain failover and manual failover, it cannot vote for the subsequent manual failover. The problem with the manual failover retry is that the mf will pause the client 5s in the primary side. So every retry every manual failover timed out is a bad move. --------- Signed-off-by: Binbin Co-authored-by: Viktor Söderqvist --- src/cluster_legacy.c | 15 +++-- src/cluster_legacy.h | 3 +- tests/support/cluster_util.tcl | 1 + tests/unit/cluster/manual-failover.tcl | 88 ++++++++++++++++++++++++++ 4 files changed, 101 insertions(+), 6 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 69af65f1e8..7b3384ee9f 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -4363,12 +4363,17 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) { /* We did not voted for a replica about this primary for two * times the node timeout. This is not strictly needed for correctness - * of the algorithm but makes the base case more linear. */ - if (mstime() - node->replicaof->voted_time < server.cluster_node_timeout * 2) { + * of the algorithm but makes the base case more linear. + * + * This limitation does not restrict manual failover. If a user initiates + * a manual failover, we need to allow it to vote, otherwise the manual + * failover may time out. */ + if (!force_ack && mstime() - node->replicaof->voted_time < server.cluster_node_timeout * 2) { serverLog(LL_WARNING, - "Failover auth denied to %.40s %s: " - "can't vote about this primary before %lld milliseconds", + "Failover auth denied to %.40s (%s): " + "can't vote for any replica of %.40s (%s) within %lld milliseconds", node->name, node->human_nodename, + node->replicaof->name, node->replicaof->human_nodename, (long long)((server.cluster_node_timeout * 2) - (mstime() - node->replicaof->voted_time))); return; } @@ -4394,7 +4399,7 @@ void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) { /* We can vote for this replica. */ server.cluster->lastVoteEpoch = server.cluster->currentEpoch; - node->replicaof->voted_time = mstime(); + if (!force_ack) node->replicaof->voted_time = mstime(); clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_FSYNC_CONFIG); clusterSendFailoverAuth(node); serverLog(LL_NOTICE, "Failover auth granted to %.40s (%s) for epoch %llu", node->name, node->human_nodename, diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 5280644e6e..2c3e1d83c8 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -338,7 +338,8 @@ struct _clusterNode { mstime_t pong_received; /* Unix time we received the pong */ mstime_t data_received; /* Unix time we received any data */ mstime_t fail_time; /* Unix time when FAIL flag was set */ - mstime_t voted_time; /* Last time we voted for a replica of this primary */ + mstime_t voted_time; /* Last time we voted for a replica of this primary in non manual + * failover scenarios. */ mstime_t repl_offset_time; /* Unix time we received offset for this node */ mstime_t orphaned_time; /* Starting time of orphaned primary condition */ long long repl_offset; /* Last known repl offset for this node. */ diff --git a/tests/support/cluster_util.tcl b/tests/support/cluster_util.tcl index 4b399214b9..686f00071b 100644 --- a/tests/support/cluster_util.tcl +++ b/tests/support/cluster_util.tcl @@ -145,6 +145,7 @@ proc wait_for_cluster_size {cluster_size} { # Check that cluster nodes agree about "state", or raise an error. proc wait_for_cluster_state {state} { for {set j 0} {$j < [llength $::servers]} {incr j} { + if {[process_is_paused [srv -$j pid]]} continue wait_for_condition 1000 50 { [CI $j cluster_state] eq $state } else { diff --git a/tests/unit/cluster/manual-failover.tcl b/tests/unit/cluster/manual-failover.tcl index 2a9dff934b..78842068fa 100644 --- a/tests/unit/cluster/manual-failover.tcl +++ b/tests/unit/cluster/manual-failover.tcl @@ -183,3 +183,91 @@ test "Wait for instance #0 to return back alive" { } } ;# start_cluster + +start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 2000}} { + test "Manual failover vote is not limited by two times the node timeout - drop the auth ack" { + set CLUSTER_PACKET_TYPE_FAILOVER_AUTH_ACK 6 + set CLUSTER_PACKET_TYPE_NONE -1 + + # Setting a large timeout to make sure we hit the voted_time limit. + R 0 config set cluster-node-timeout 150000 + R 1 config set cluster-node-timeout 150000 + R 2 config set cluster-node-timeout 150000 + + # Let replica drop FAILOVER_AUTH_ACK so that the election won't + # get the enough votes and the election will time out. + R 3 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_FAILOVER_AUTH_ACK + + # The first manual failover will time out. + R 3 cluster failover + wait_for_log_messages 0 {"*Manual failover timed out*"} 0 1000 50 + wait_for_log_messages -3 {"*Manual failover timed out*"} 0 1000 50 + + # Undo packet drop, so that replica can win the next election. + R 3 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_NONE + + # Make sure the second manual failover will work. + R 3 cluster failover + wait_for_condition 1000 50 { + [s 0 role] eq {slave} && + [s -3 role] eq {master} + } else { + fail "The second failover does not happen" + } + wait_for_cluster_propagation + } +} ;# start_cluster + +start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 2000}} { + test "Manual failover vote is not limited by two times the node timeout - mixed failover" { + # Make sure the failover is triggered by us. + R 1 config set cluster-replica-validity-factor 0 + R 3 config set cluster-replica-no-failover yes + R 3 config set cluster-replica-validity-factor 0 + + # Pause the primary. + pause_process [srv 0 pid] + wait_for_cluster_state fail + + # Setting a large timeout to make sure we hit the voted_time limit. + R 1 config set cluster-node-timeout 150000 + R 2 config set cluster-node-timeout 150000 + + # R 3 performs an automatic failover and it will work. + R 3 config set cluster-replica-no-failover no + wait_for_condition 1000 50 { + [s -3 role] eq {master} + } else { + fail "The first failover does not happen" + } + + # Resume the primary and wait for it to become a replica. + resume_process [srv 0 pid] + wait_for_condition 1000 50 { + [s 0 role] eq {slave} + } else { + fail "Old primary not converted into replica" + } + wait_for_cluster_propagation + + # The old primary doing a manual failover and wait for it. + R 0 cluster failover + wait_for_condition 1000 50 { + [s 0 role] eq {master} && + [s -3 role] eq {slave} + } else { + fail "The second failover does not happen" + } + wait_for_cluster_propagation + + # R 3 performs a manual failover and it will work. + R 3 cluster failover + wait_for_condition 1000 50 { + [s 0 role] eq {slave} && + [s -3 role] eq {master} + } else { + fail "The third falover does not happen" + } + wait_for_cluster_propagation + } +} ;# start_cluster From 49863109453faa907ce2c8b1158e60a6777d28ab Mon Sep 17 00:00:00 2001 From: Yanqi Lv Date: Wed, 20 Nov 2024 04:53:19 +0800 Subject: [PATCH 28/60] Import-mode: Avoid expiration and eviction during data syncing (#1185) New config: `import-mode (yes|no)` New command: `CLIENT IMPORT-SOURCE (ON|OFF)` The config, when set to `yes`, disables eviction and deletion of expired keys, except for commands coming from a client which has marked itself as an import-source, the data source when importing data from another node, using the CLIENT IMPORT-SOURCE command. When we sync data from the source Valkey to the destination Valkey using some sync tools like [redis-shake](https://github.com/tair-opensource/RedisShake), the destination Valkey can perform expiration and eviction, which may cause data corruption. This problem has been discussed in https://github.com/redis/redis/discussions/9760#discussioncomment-1681041 and Redis already have a solution. But in Valkey we haven't fixed it by now. E.g. we call `set key 1 ex 1` on the source server and transfer this command to the destination server. Then we call `incr key` on the source server before the key expired, we will have a key on the source server with a value of 2. But when the command arrived at the destination server, the key may be expired and has deleted. So we will have a key on the destination server with a value of 1, which is inconsistent with the source server. In standalone mode, we can use writable replica to simplify the sync process. However, in cluster mode, we still need a sync tool to help us transfer the source data to the destination. The sync tool usually work as a normal client and the destination works as a primary which keep expiration and eviction. In this PR, we add a new mode named 'import-mode'. In this mode, server stop expiration and eviction just like a replica. Notice that this mode exists only in sync state to avoid data inconsistency caused by expiration and eviction. Import mode only takes effect on the primary. Sync tools can mark their clients as an import source by `CLIENT IMPORT-SOURCE`, which work like a client from primary and can visit expired keys in `lookupkey`. **Notice: during the migration, other clients, apart from the import source, should not access the data imported by import source.** --------- Signed-off-by: lvyanqi.lyq Signed-off-by: Yanqi Lv Co-authored-by: Madelyn Olson --- src/commands.def | 29 ++++++++++ src/commands/client-import-source.json | 40 ++++++++++++++ src/config.c | 1 + src/db.c | 21 +++++++- src/evict.c | 4 +- src/expire.c | 7 ++- src/networking.c | 20 +++++++ src/server.c | 9 ++-- src/server.h | 5 +- tests/unit/expire.tcl | 74 ++++++++++++++++++++++++++ tests/unit/maxmemory.tcl | 18 +++++++ valkey.conf | 7 +++ 12 files changed, 225 insertions(+), 10 deletions(-) create mode 100644 src/commands/client-import-source.json diff --git a/src/commands.def b/src/commands.def index 791b30d540..ecc77126af 100644 --- a/src/commands.def +++ b/src/commands.def @@ -1230,6 +1230,34 @@ struct COMMAND_ARG CLIENT_CAPA_Args[] = { #define CLIENT_ID_Keyspecs NULL #endif +/********** CLIENT IMPORT_SOURCE ********************/ + +#ifndef SKIP_CMD_HISTORY_TABLE +/* CLIENT IMPORT_SOURCE history */ +#define CLIENT_IMPORT_SOURCE_History NULL +#endif + +#ifndef SKIP_CMD_TIPS_TABLE +/* CLIENT IMPORT_SOURCE tips */ +#define CLIENT_IMPORT_SOURCE_Tips NULL +#endif + +#ifndef SKIP_CMD_KEY_SPECS_TABLE +/* CLIENT IMPORT_SOURCE key specs */ +#define CLIENT_IMPORT_SOURCE_Keyspecs NULL +#endif + +/* CLIENT IMPORT_SOURCE enabled argument table */ +struct COMMAND_ARG CLIENT_IMPORT_SOURCE_enabled_Subargs[] = { +{MAKE_ARG("on",ARG_TYPE_PURE_TOKEN,-1,"ON",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +{MAKE_ARG("off",ARG_TYPE_PURE_TOKEN,-1,"OFF",NULL,NULL,CMD_ARG_NONE,0,NULL)}, +}; + +/* CLIENT IMPORT_SOURCE argument table */ +struct COMMAND_ARG CLIENT_IMPORT_SOURCE_Args[] = { +{MAKE_ARG("enabled",ARG_TYPE_ONEOF,-1,NULL,NULL,NULL,CMD_ARG_NONE,2,NULL),.subargs=CLIENT_IMPORT_SOURCE_enabled_Subargs}, +}; + /********** CLIENT INFO ********************/ #ifndef SKIP_CMD_HISTORY_TABLE @@ -1630,6 +1658,7 @@ struct COMMAND_STRUCT CLIENT_Subcommands[] = { {MAKE_CMD("getredir","Returns the client ID to which the connection's tracking notifications are redirected.","O(1)","6.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_GETREDIR_History,0,CLIENT_GETREDIR_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_GETREDIR_Keyspecs,0,NULL,0)}, {MAKE_CMD("help","Returns helpful text about the different subcommands.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_HELP_History,0,CLIENT_HELP_Tips,0,clientCommand,2,CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_HELP_Keyspecs,0,NULL,0)}, {MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)}, +{MAKE_CMD("import-source","Mark this client as an import source when server is in import mode.","O(1)","8.1.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_IMPORT_SOURCE_History,0,CLIENT_IMPORT_SOURCE_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_IMPORT_SOURCE_Keyspecs,0,NULL,1),.args=CLIENT_IMPORT_SOURCE_Args}, {MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)}, {MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,7,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args}, {MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,7,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args}, diff --git a/src/commands/client-import-source.json b/src/commands/client-import-source.json new file mode 100644 index 0000000000..113c07d70a --- /dev/null +++ b/src/commands/client-import-source.json @@ -0,0 +1,40 @@ +{ + "IMPORT-SOURCE": { + "summary": "Mark this client as an import source when server is in import mode.", + "complexity": "O(1)", + "group": "connection", + "since": "8.1.0", + "arity": 3, + "container": "CLIENT", + "function": "clientCommand", + "command_flags": [ + "NOSCRIPT", + "LOADING", + "STALE" + ], + "acl_categories": [ + "CONNECTION" + ], + "reply_schema": { + "const": "OK" + }, + "arguments": [ + { + "name": "enabled", + "type": "oneof", + "arguments": [ + { + "name": "on", + "type": "pure-token", + "token": "ON" + }, + { + "name": "off", + "type": "pure-token", + "token": "OFF" + } + ] + } + ] + } +} \ No newline at end of file diff --git a/src/config.c b/src/config.c index 15fec15276..c4009adefa 100644 --- a/src/config.c +++ b/src/config.c @@ -3139,6 +3139,7 @@ standardConfig static_configs[] = { createBoolConfig("enable-debug-assert", NULL, IMMUTABLE_CONFIG | HIDDEN_CONFIG, server.enable_debug_assert, 0, NULL, NULL), createBoolConfig("cluster-slot-stats-enabled", NULL, MODIFIABLE_CONFIG, server.cluster_slot_stats_enabled, 0, NULL, NULL), createBoolConfig("hide-user-data-from-log", NULL, MODIFIABLE_CONFIG, server.hide_user_data_from_log, 1, NULL, NULL), + createBoolConfig("import-mode", NULL, MODIFIABLE_CONFIG, server.import_mode, 0, NULL, NULL), /* String Configs */ createStringConfig("aclfile", NULL, IMMUTABLE_CONFIG, ALLOW_EMPTY_STRING, server.acl_filename, "", NULL, NULL), diff --git a/src/db.c b/src/db.c index b59c7727b2..10d4a04091 100644 --- a/src/db.c +++ b/src/db.c @@ -385,7 +385,7 @@ robj *dbRandomKey(serverDb *db) { key = dictGetKey(de); keyobj = createStringObject(key, sdslen(key)); if (dbFindExpiresWithDictIndex(db, key, randomDictIndex)) { - if (allvolatile && server.primary_host && --maxtries == 0) { + if (allvolatile && (server.primary_host || server.import_mode) && --maxtries == 0) { /* If the DB is composed only of keys with an expire set, * it could happen that all the keys are already logically * expired in the repilca, so the function cannot stop because @@ -1821,6 +1821,25 @@ keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, int flags, int di if (server.primary_host != NULL) { if (server.current_client && (server.current_client->flag.primary)) return KEY_VALID; if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED; + } else if (server.import_mode) { + /* If we are running in the import mode on a primary, instead of + * evicting the expired key from the database, we return ASAP: + * the key expiration is controlled by the import source that will + * send us synthesized DEL operations for expired keys. The + * exception is when write operations are performed on this server + * because it's a primary. + * + * Notice: other clients, apart from the import source, should not access + * the data imported by import source. + * + * Still we try to return the right information to the caller, + * that is, KEY_VALID if we think the key should still be valid, + * KEY_EXPIRED if we think the key is expired but don't want to delete it at this time. + * + * When receiving commands from the import source, keys are never considered + * expired. */ + if (server.current_client && (server.current_client->flag.import_source)) return KEY_VALID; + if (!(flags & EXPIRE_FORCE_DELETE_EXPIRED)) return KEY_EXPIRED; } /* In some cases we're explicitly instructed to return an indication of a diff --git a/src/evict.c b/src/evict.c index 5e4b6220eb..5208328b32 100644 --- a/src/evict.c +++ b/src/evict.c @@ -546,8 +546,8 @@ int performEvictions(void) { goto update_metrics; } - if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION) { - result = EVICT_FAIL; /* We need to free memory, but policy forbids. */ + if (server.maxmemory_policy == MAXMEMORY_NO_EVICTION || (iAmPrimary() && server.import_mode)) { + result = EVICT_FAIL; /* We need to free memory, but policy forbids or we are in import mode. */ goto update_metrics; } diff --git a/src/expire.c b/src/expire.c index 928bb58d86..c22df1ef86 100644 --- a/src/expire.c +++ b/src/expire.c @@ -520,8 +520,11 @@ int checkAlreadyExpired(long long when) { * of a replica instance. * * Instead we add the already expired key to the database with expire time - * (possibly in the past) and wait for an explicit DEL from the primary. */ - return (when <= commandTimeSnapshot() && !server.loading && !server.primary_host); + * (possibly in the past) and wait for an explicit DEL from the primary. + * + * If the server is a primary and in the import mode, we also add the already + * expired key and wait for an explicit DEL from the import source. */ + return (when <= commandTimeSnapshot() && !server.loading && !server.primary_host && !server.import_mode); } #define EXPIRE_NX (1 << 0) diff --git a/src/networking.c b/src/networking.c index 4791055b5a..9558780f39 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3585,6 +3585,10 @@ void clientCommand(client *c) { " Protect current client connection from eviction.", "NO-TOUCH (ON|OFF)", " Will not touch LRU/LFU stats when this mode is on.", + "IMPORT-SOURCE (ON|OFF)", + " Mark this connection as an import source if server.import_mode is true.", + " Sync tools can set their connections into 'import-source' state to visit", + " expired keys.", NULL}; addReplyHelp(c, help); } else if (!strcasecmp(c->argv[1]->ptr, "id") && c->argc == 2) { @@ -4058,6 +4062,22 @@ void clientCommand(client *c) { } } addReply(c, shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr, "import-source")) { + /* CLIENT IMPORT-SOURCE ON|OFF */ + if (!server.import_mode) { + addReplyError(c, "Server is not in import mode"); + return; + } + if (!strcasecmp(c->argv[2]->ptr, "on")) { + c->flag.import_source = 1; + addReply(c, shared.ok); + } else if (!strcasecmp(c->argv[2]->ptr, "off")) { + c->flag.import_source = 0; + addReply(c, shared.ok); + } else { + addReplyErrorObject(c, shared.syntaxerr); + return; + } } else { addReplySubcommandSyntaxError(c); } diff --git a/src/server.c b/src/server.c index 12691df8ee..aebbb57a93 100644 --- a/src/server.c +++ b/src/server.c @@ -1131,10 +1131,10 @@ void databasesCron(void) { /* Expire keys by random sampling. Not required for replicas * as primary will synthesize DELs for us. */ if (server.active_expire_enabled) { - if (iAmPrimary()) { - activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); - } else { + if (!iAmPrimary()) { expireReplicaKeys(); + } else if (!server.import_mode) { + activeExpireCycle(ACTIVE_EXPIRE_CYCLE_SLOW); } } @@ -1727,7 +1727,7 @@ void beforeSleep(struct aeEventLoop *eventLoop) { /* Run a fast expire cycle (the called function will return * ASAP if a fast cycle is not needed). */ - if (server.active_expire_enabled && iAmPrimary()) activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST); + if (server.active_expire_enabled && !server.import_mode && iAmPrimary()) activeExpireCycle(ACTIVE_EXPIRE_CYCLE_FAST); if (moduleCount()) { moduleFireServerEvent(VALKEYMODULE_EVENT_EVENTLOOP, VALKEYMODULE_SUBEVENT_EVENTLOOP_BEFORE_SLEEP, NULL); @@ -2133,6 +2133,7 @@ void initServerConfig(void) { server.extended_redis_compat = 0; server.pause_cron = 0; server.dict_resizing = 1; + server.import_mode = 0; server.latency_tracking_info_percentiles_len = 3; server.latency_tracking_info_percentiles = zmalloc(sizeof(double) * (server.latency_tracking_info_percentiles_len)); diff --git a/src/server.h b/src/server.h index 5ef04a9080..531ca8e7c8 100644 --- a/src/server.h +++ b/src/server.h @@ -1233,7 +1233,8 @@ typedef struct ClientFlags { * knows that it does not need the cache and required a full sync. With this * flag, we won't cache the primary in freeClient. */ uint64_t fake : 1; /* This is a fake client without a real connection. */ - uint64_t reserved : 5; /* Reserved for future use */ + uint64_t import_source : 1; /* This client is importing data to server and can visit expired key. */ + uint64_t reserved : 4; /* Reserved for future use */ } ClientFlags; typedef struct client { @@ -2089,6 +2090,8 @@ struct valkeyServer { char primary_replid[CONFIG_RUN_ID_SIZE + 1]; /* Primary PSYNC runid. */ long long primary_initial_offset; /* Primary PSYNC offset. */ int repl_replica_lazy_flush; /* Lazy FLUSHALL before loading DB? */ + /* Import Mode */ + int import_mode; /* If true, server is in import mode and forbid expiration and eviction. */ /* Synchronous replication. */ list *clients_waiting_acks; /* Clients waiting in WAIT or WAITAOF. */ int get_ack_from_replicas; /* If true we send REPLCONF GETACK. */ diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index d85ce7ee68..fba425f62d 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -832,6 +832,80 @@ start_server {tags {"expire"}} { close_replication_stream $repl assert_equal [r debug set-active-expire 1] {OK} } {} {needs:debug} + + test {Import mode should forbid active expiration} { + r flushall + + r config set import-mode yes + assert_equal [r client import-source on] {OK} + + r set foo1 bar PX 1 + r set foo2 bar PX 1 + after 100 + + assert_equal [r dbsize] {2} + + assert_equal [r client import-source off] {OK} + r config set import-mode no + + # Verify all keys have expired + wait_for_condition 40 100 { + [r dbsize] eq 0 + } else { + fail "Keys did not actively expire." + } + } + + test {Import mode should forbid lazy expiration} { + r flushall + r debug set-active-expire 0 + + r config set import-mode yes + assert_equal [r client import-source on] {OK} + + r set foo1 1 PX 1 + after 10 + + r get foo1 + assert_equal [r dbsize] {1} + + assert_equal [r client import-source off] {OK} + r config set import-mode no + + r get foo1 + + assert_equal [r dbsize] {0} + + assert_equal [r debug set-active-expire 1] {OK} + } {} {needs:debug} + + test {RANDOMKEY can return expired key in import mode} { + r flushall + + r config set import-mode yes + assert_equal [r client import-source on] {OK} + + r set foo1 bar PX 1 + after 10 + + set client [valkey [srv "host"] [srv "port"] 0 $::tls] + if {!$::singledb} { + $client select 9 + } + assert_equal [$client ttl foo1] {-2} + + assert_equal [r randomkey] {foo1} + + assert_equal [r client import-source off] {OK} + r config set import-mode no + + # Verify all keys have expired + wait_for_condition 40 100 { + [r dbsize] eq 0 + } else { + fail "Keys did not actively expire." + } + } } start_cluster 1 0 {tags {"expire external:skip cluster"}} { diff --git a/tests/unit/maxmemory.tcl b/tests/unit/maxmemory.tcl index d4e62246f1..89e9699a3e 100644 --- a/tests/unit/maxmemory.tcl +++ b/tests/unit/maxmemory.tcl @@ -611,3 +611,21 @@ start_server {tags {"maxmemory" "external:skip"}} { assert {[r object freq foo] == 5} } } + +start_server {tags {"maxmemory" "external:skip"}} { + test {Import mode should forbid eviction} { + r set key val + r config set import-mode yes + assert_equal [r client import-source on] {OK} + r config set maxmemory-policy allkeys-lru + r config set maxmemory 1 + + assert_equal [r dbsize] {1} + assert_error {OOM command not allowed*} {r set key1 val1} + + assert_equal [r client import-source off] {OK} + r config set import-mode no + + assert_equal [r dbsize] {0} + } +} \ No newline at end of file diff --git a/valkey.conf b/valkey.conf index 7c7b9da43e..bf82b01874 100644 --- a/valkey.conf +++ b/valkey.conf @@ -818,6 +818,13 @@ replica-priority 100 # # replica-ignore-disk-write-errors no +# Make the primary forbid expiration and eviction. +# This is useful for sync tools, because expiration and eviction may cause the data corruption. +# Sync tools can mark their connections as importing source by CLIENT IMPORT-SOURCE. +# NOTICE: Clients should avoid writing the same key on the source server and the destination server. +# +# import-mode no + # ----------------------------------------------------------------------------- # By default, Sentinel includes all replicas in its reports. A replica # can be excluded from Sentinel's announcements. An unannounced replica From f553ccbda674caa13d3cfa6e8096c5f19cb3a9c1 Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 21 Nov 2024 20:01:30 +0800 Subject: [PATCH 29/60] Use goto to cleanup error handling in readSyncBulkPayload (#1332) The goto error label is the same as the error return, use goto to reduce the references. ``` error: cancelReplicationHandshake(1); return; ``` Also this can make the log printing more continuous under the error, that is, we print the error log first, and then print the reconnecting log at the last (in cancelReplicationHandshake). Signed-off-by: Binbin --- src/replication.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/replication.c b/src/replication.c index a809c4c166..75f08c4c89 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2091,8 +2091,7 @@ void readSyncBulkPayload(connection *conn) { } serverLog(LL_WARNING, "I/O error trying to sync with PRIMARY: %s", (nread == -1) ? connGetLastError(conn) : "connection lost"); - cancelReplicationHandshake(1); - return; + goto error; } server.stat_net_repl_input_bytes += nread; @@ -2257,7 +2256,6 @@ void readSyncBulkPayload(connection *conn) { if (loadingFailed) { stopLoading(0); - cancelReplicationHandshake(1); rioFreeConn(&rdb, NULL); if (server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { @@ -2277,7 +2275,7 @@ void readSyncBulkPayload(connection *conn) { /* Note that there's no point in restarting the AOF on SYNC * failure, it'll be restarted when sync succeeds or the replica * gets promoted. */ - return; + goto error; } /* RDB loading succeeded if we reach this point. */ @@ -2319,8 +2317,7 @@ void readSyncBulkPayload(connection *conn) { "Failed trying to sync the temp DB to disk in " "PRIMARY <-> REPLICA synchronization: %s", strerror(errno)); - cancelReplicationHandshake(1); - return; + goto error; } /* Rename rdb like renaming rewrite aof asynchronously. */ @@ -2330,9 +2327,8 @@ void readSyncBulkPayload(connection *conn) { "Failed trying to rename the temp DB into %s in " "PRIMARY <-> REPLICA synchronization: %s", server.rdb_filename, strerror(errno)); - cancelReplicationHandshake(1); if (old_rdb_fd != -1) close(old_rdb_fd); - return; + goto error; } /* Close old rdb asynchronously. */ if (old_rdb_fd != -1) bioCreateCloseJob(old_rdb_fd, 0, 0); @@ -2343,8 +2339,7 @@ void readSyncBulkPayload(connection *conn) { "Failed trying to sync DB directory %s in " "PRIMARY <-> REPLICA synchronization: %s", server.rdb_filename, strerror(errno)); - cancelReplicationHandshake(1); - return; + goto error; } /* We will soon start loading the RDB from disk, the replication history is changed, @@ -2361,7 +2356,6 @@ void readSyncBulkPayload(connection *conn) { if (rdbLoad(server.rdb_filename, &rsi, RDBFLAGS_REPLICATION) != RDB_OK) { serverLog(LL_WARNING, "Failed trying to load the PRIMARY synchronization " "DB from disk, check server logs."); - cancelReplicationHandshake(1); if (server.rdb_del_sync_files && allPersistenceDisabled()) { serverLog(LL_NOTICE, "Removing the RDB file obtained from " "the primary. This replica has persistence " @@ -2375,7 +2369,7 @@ void readSyncBulkPayload(connection *conn) { /* Note that there's no point in restarting the AOF on sync failure, it'll be restarted when sync succeeds or replica promoted. */ - return; + goto error; } /* Cleanup. */ From 6038eda010dfb99eff908cf0839cc41004383acd Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 21 Nov 2024 21:02:05 +0800 Subject: [PATCH 30/60] Make FUNCTION RESTORE FLUSH flush async based on lazyfree-lazy-user-flush (#1254) FUNCTION RESTORE have a FLUSH option, it will delete all the existing libraries before restoring the payload. If for some reasons, there are a lot of libraries, we will block a while in here. Signed-off-by: Binbin --- src/functions.c | 17 +++++++++++++---- src/functions.h | 4 ++-- src/replication.c | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/functions.c b/src/functions.c index c9ec42b322..916d8fd622 100644 --- a/src/functions.c +++ b/src/functions.c @@ -185,6 +185,15 @@ void functionsLibCtxClearCurrent(int async) { } } +/* Free the given functions ctx */ +static void functionsLibCtxFreeGeneric(functionsLibCtx *functions_lib_ctx, int async) { + if (async) { + freeFunctionsAsync(functions_lib_ctx); + } else { + functionsLibCtxFree(functions_lib_ctx); + } +} + /* Free the given functions ctx */ void functionsLibCtxFree(functionsLibCtx *functions_lib_ctx) { functionsLibCtxClear(functions_lib_ctx); @@ -196,8 +205,8 @@ void functionsLibCtxFree(functionsLibCtx *functions_lib_ctx) { /* Swap the current functions ctx with the given one. * Free the old functions ctx. */ -void functionsLibCtxSwapWithCurrent(functionsLibCtx *new_lib_ctx) { - functionsLibCtxFree(curr_functions_lib_ctx); +void functionsLibCtxSwapWithCurrent(functionsLibCtx *new_lib_ctx, int async) { + functionsLibCtxFreeGeneric(curr_functions_lib_ctx, async); curr_functions_lib_ctx = new_lib_ctx; } @@ -769,7 +778,7 @@ void functionRestoreCommand(client *c) { } if (restore_replicy == restorePolicy_Flush) { - functionsLibCtxSwapWithCurrent(functions_lib_ctx); + functionsLibCtxSwapWithCurrent(functions_lib_ctx, server.lazyfree_lazy_user_flush); functions_lib_ctx = NULL; /* avoid releasing the f_ctx in the end */ } else { if (libraryJoin(curr_functions_lib_ctx, functions_lib_ctx, restore_replicy == restorePolicy_Replace, &err) != @@ -789,7 +798,7 @@ void functionRestoreCommand(client *c) { addReply(c, shared.ok); } if (functions_lib_ctx) { - functionsLibCtxFree(functions_lib_ctx); + functionsLibCtxFreeGeneric(functions_lib_ctx, server.lazyfree_lazy_user_flush); } } diff --git a/src/functions.h b/src/functions.h index da196cf197..429405bb2d 100644 --- a/src/functions.h +++ b/src/functions.h @@ -134,9 +134,9 @@ size_t functionsLibCtxFunctionsLen(functionsLibCtx *functions_ctx); functionsLibCtx *functionsLibCtxGetCurrent(void); functionsLibCtx *functionsLibCtxCreate(void); void functionsLibCtxClearCurrent(int async); -void functionsLibCtxFree(functionsLibCtx *lib_ctx); +void functionsLibCtxFree(functionsLibCtx *functions_lib_ctx); void functionsLibCtxClear(functionsLibCtx *lib_ctx); -void functionsLibCtxSwapWithCurrent(functionsLibCtx *lib_ctx); +void functionsLibCtxSwapWithCurrent(functionsLibCtx *new_lib_ctx, int async); int functionLibCreateFunction(sds name, void *function, functionLibInfo *li, sds desc, uint64_t f_flags, sds *err); diff --git a/src/replication.c b/src/replication.c index 75f08c4c89..437ae278ec 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2289,7 +2289,7 @@ void readSyncBulkPayload(connection *conn) { swapMainDbWithTempDb(diskless_load_tempDb); /* swap existing functions ctx with the temporary one */ - functionsLibCtxSwapWithCurrent(temp_functions_lib_ctx); + functionsLibCtxSwapWithCurrent(temp_functions_lib_ctx, 0); moduleFireServerEvent(VALKEYMODULE_EVENT_REPL_ASYNC_LOAD, VALKEYMODULE_SUBEVENT_REPL_ASYNC_LOAD_COMPLETED, NULL); From b486a415009660f355d0a8eb9fd67a9c9cb9cc6e Mon Sep 17 00:00:00 2001 From: xbasel <103044017+xbasel@users.noreply.github.com> Date: Thu, 21 Nov 2024 18:22:16 +0200 Subject: [PATCH 31/60] Preserve original fd blocking state in TLS I/O operations (#1298) This change prevents unintended side effects on connection state and improves consistency with non-TLS sync operations. For example, when invoking `connTLSSyncRead` with a blocking file descriptor, the mode is switched to non-blocking upon `connTLSSyncRead` exit. If the code assumes the file descriptor remains blocking and calls the normal `read` expecting it to block, it may result in a short read. This caused a crash in dual-channel, which was fixed in this PR by relocating `connBlock()`: https://github.com/valkey-io/valkey/pull/837 Signed-off-by: xbasel <103044017+xbasel@users.noreply.github.com> --- src/anet.c | 30 ++++++++++++++++++++++++++---- src/anet.h | 1 + src/tls.c | 21 ++++++++++++++++----- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/anet.c b/src/anet.c index d4ac698982..8dc06ca62e 100644 --- a/src/anet.c +++ b/src/anet.c @@ -70,17 +70,24 @@ int anetGetError(int fd) { return sockerr; } -int anetSetBlock(char *err, int fd, int non_block) { +static int anetGetSocketFlags(char *err, int fd) { int flags; - /* Set the socket blocking (if non_block is zero) or non-blocking. - * Note that fcntl(2) for F_GETFL and F_SETFL can't be - * interrupted by a signal. */ if ((flags = fcntl(fd, F_GETFL)) == -1) { anetSetError(err, "fcntl(F_GETFL): %s", strerror(errno)); return ANET_ERR; } + return flags; +} + +int anetSetBlock(char *err, int fd, int non_block) { + int flags = anetGetSocketFlags(err, fd); + + if (flags == ANET_ERR) { + return ANET_ERR; + } + /* Check if this flag has been set or unset, if so, * then there is no need to call fcntl to set/unset it again. */ if (!!(flags & O_NONBLOCK) == !!non_block) return ANET_OK; @@ -105,6 +112,21 @@ int anetBlock(char *err, int fd) { return anetSetBlock(err, fd, 0); } +int anetIsBlock(char *err, int fd) { + int flags = anetGetSocketFlags(err, fd); + + if (flags == ANET_ERR) { + return ANET_ERR; + } + + /* Check if the O_NONBLOCK flag is set */ + if (flags & O_NONBLOCK) { + return 0; /* Socket is non-blocking */ + } else { + return 1; /* Socket is blocking */ + } +} + /* Enable the FD_CLOEXEC on the given fd to avoid fd leaks. * This function should be invoked for fd's on specific places * where fork + execve system calls are called. */ diff --git a/src/anet.h b/src/anet.h index ab32f72e4b..b14b4bdaad 100644 --- a/src/anet.h +++ b/src/anet.h @@ -61,6 +61,7 @@ int anetTcpAccept(char *err, int serversock, char *ip, size_t ip_len, int *port) int anetUnixAccept(char *err, int serversock); int anetNonBlock(char *err, int fd); int anetBlock(char *err, int fd); +int anetIsBlock(char *err, int fd); int anetCloexec(int fd); int anetEnableTcpNoDelay(char *err, int fd); int anetDisableTcpNoDelay(char *err, int fd); diff --git a/src/tls.c b/src/tls.c index a1fda2a7ae..d1dd567354 100644 --- a/src/tls.c +++ b/src/tls.c @@ -974,6 +974,10 @@ static int connTLSSetReadHandler(connection *conn, ConnectionCallbackFunc func) return C_OK; } +static int isBlocking(tls_connection *conn) { + return anetIsBlock(NULL, conn->c.fd); +} + static void setBlockingTimeout(tls_connection *conn, long long timeout) { anetBlock(NULL, conn->c.fd); anetSendTimeout(NULL, conn->c.fd, timeout); @@ -1012,27 +1016,31 @@ static int connTLSBlockingConnect(connection *conn_, const char *addr, int port, static ssize_t connTLSSyncWrite(connection *conn_, char *ptr, ssize_t size, long long timeout) { tls_connection *conn = (tls_connection *)conn_; - + int blocking = isBlocking(conn); setBlockingTimeout(conn, timeout); SSL_clear_mode(conn->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE); ERR_clear_error(); int ret = SSL_write(conn->ssl, ptr, size); ret = updateStateAfterSSLIO(conn, ret, 0); SSL_set_mode(conn->ssl, SSL_MODE_ENABLE_PARTIAL_WRITE); - unsetBlockingTimeout(conn); + if (!blocking) { + unsetBlockingTimeout(conn); + } return ret; } static ssize_t connTLSSyncRead(connection *conn_, char *ptr, ssize_t size, long long timeout) { tls_connection *conn = (tls_connection *)conn_; - + int blocking = isBlocking(conn); setBlockingTimeout(conn, timeout); ERR_clear_error(); int ret = SSL_read(conn->ssl, ptr, size); updateSSLPendingFlag(conn); ret = updateStateAfterSSLIO(conn, ret, 0); - unsetBlockingTimeout(conn); + if (!blocking) { + unsetBlockingTimeout(conn); + } return ret; } @@ -1041,6 +1049,7 @@ static ssize_t connTLSSyncReadLine(connection *conn_, char *ptr, ssize_t size, l tls_connection *conn = (tls_connection *)conn_; ssize_t nread = 0; + int blocking = isBlocking(conn); setBlockingTimeout(conn, timeout); size--; @@ -1067,7 +1076,9 @@ static ssize_t connTLSSyncReadLine(connection *conn_, char *ptr, ssize_t size, l size--; } exit: - unsetBlockingTimeout(conn); + if (!blocking) { + unsetBlockingTimeout(conn); + } return nread; } From b56eed2479191dfd1f644768b7144c35a75ef52c Mon Sep 17 00:00:00 2001 From: zvi-code <54795925+zvi-code@users.noreply.github.com> Date: Fri, 22 Nov 2024 02:29:21 +0200 Subject: [PATCH 32/60] Remove valkey specific changes in jemalloc source code (#1266) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary of the change This is a base PR for refactoring defrag. It moves the defrag logic to rely on jemalloc [native api](https://github.com/jemalloc/jemalloc/pull/1463#issuecomment-479706489) instead of relying on custom code changes made by valkey in the jemalloc ([je_defrag_hint](https://github.com/valkey-io/valkey/blob/9f8185f5c80bc98bdbc631b90ccf13929d6a0cbc/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h#L382)) library. This enables valkey to use latest vanila jemalloc without the need to maintain code changes cross jemalloc versions. This change requires some modifications because the new api is providing only the information, not a yes\no defrag. The logic needs to be implemented at valkey code. Additionally, the api does not provide, within single call, all the information needed to make a decision, this information is available through additional api call. To reduce the calls to jemalloc, in this PR the required information is collected during the `computeDefragCycles` and not for every single ptr, this way we are avoiding the additional api call. Followup work will utilize the new options that are now open and will further improve the defrag decision and process. ### Added files: `allocator_defrag.c` / `allocator_defrag.h` - This files implement the allocator specific knowledge for making defrag decision. The knowledge about slabs and allocation logic and so on, all goes into this file. This improves the separation between jemalloc specific code and other possible implementation. ### Moved functions: [`zmalloc_no_tcache` , `zfree_no_tcache` ](https://github.com/valkey-io/valkey/blob/4593dc2f059661e1c4eb43bba025f68948344228/src/zmalloc.c#L215) - these are very jemalloc specific logic assumptions, and are very specific to how we defrag with jemalloc. This is also with the vision that from performance perspective we should consider using tcache, we only need to make sure we don't recycle entries without going through the arena [for example: we can use private tcache, one for free and one for alloc]. `frag_smallbins_bytes` - the logic and implementation moved to the new file ### Existing API: * [once a second + when completed full cycle] [`computeDefragCycles`](https://github.com/valkey-io/valkey/blob/4593dc2f059661e1c4eb43bba025f68948344228/src/defrag.c#L916) * `zmalloc_get_allocator_info` : gets from jemalloc _allocated, active, resident, retained, muzzy_, `frag_smallbins_bytes` * [`frag_smallbins_bytes`](https://github.com/valkey-io/valkey/blob/4593dc2f059661e1c4eb43bba025f68948344228/src/zmalloc.c#L690) : for each bin; gets from jemalloc bin_info, `curr_regs`, `cur_slabs` * [during defrag, for each pointer] * `je_defrag_hint` is getting a memory pointer and returns {0,1} . [Internally it uses](https://github.com/valkey-io/valkey/blob/4593dc2f059661e1c4eb43bba025f68948344228/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h#L368) this information points: * #`nonfull_slabs` * #`total_slabs` * #free regs in the ptr slab ## Jemalloc API (via ctl interface) [BATCH][`experimental_utilization_batch_query_ctl`](https://github.com/valkey-io/valkey/blob/4593dc2f059661e1c4eb43bba025f68948344228/deps/jemalloc/src/ctl.c#L4114) : gets an array of pointers, returns for each pointer 3 values, * number of free regions in the extent * number of regions in the extent * size of the extent in terms of bytes [EXTENDED][`experimental_utilization_query_ctl`](https://github.com/valkey-io/valkey/blob/4593dc2f059661e1c4eb43bba025f68948344228/deps/jemalloc/src/ctl.c#L3989) : * memory address of the extent a potential reallocation would go into * number of free regions in the extent * number of regions in the extent * size of the extent in terms of bytes * [stats-enabled]total number of free regions in the bin the extent belongs to * [stats-enabled]total number of regions in the bin the extent belongs to ### `experimental_utilization_batch_query_ctl` vs valkey `je_defrag_hint`? [good] - We can query pointers in a batch, reduce the overall overhead - The per ptr decision algorithm is not within jemalloc api, jemalloc only provides information, valkey can tune\configure\optimize easily [bad] - In the batch API we only know the utilization of the slab (of that memory ptr), we don’t get the data about #`nonfull_slabs` and total allocated regs. ## New functions: 1. `defrag_jemalloc_init`: Reducing the cost of call to je_ctl: use the [MIB interface](https://jemalloc.net/jemalloc.3.html) to get a faster calls. See this quote from the jemalloc documentation: The mallctlnametomib() function provides a way to avoid repeated name lookups for applications that repeatedly query the same portion of the namespace,by translating a name to a “Management Information Base” (MIB) that can be passed repeatedly to mallctlbymib(). 6. `jemalloc_sz2binind_lgq*` : this api is to support reverse map between bin size and it’s info without lookup. This mapping depends on the number of size classes we have that are derived from [`lg_quantum`](https://github.com/valkey-io/valkey/blob/4593dc2f059661e1c4eb43bba025f68948344228/deps/Makefile#L115) 7. `defrag_jemalloc_get_frag_smallbins` : This function replaces `frag_smallbins_bytes` the logic moved to the new file allocator_defrag `defrag_jemalloc_should_defrag_multi` → `handle_results` - unpacks the results 8. `should_defrag` : implements the same logic as the existing implementation [inside](https://github.com/valkey-io/valkey/blob/9f8185f5c80bc98bdbc631b90ccf13929d6a0cbc/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h#L382) je_defrag_hint 9. `defrag_jemalloc_should_defrag_multi` : implements the hint for an array of pointers, utilizing the new batch api. currently only 1 pointer is passed. ### Logical differences: In order to get the information about #`nonfull_slabs` and #`regs`, we use the query cycle to collect the information per size class. In order to find the index of bin information given bin size, in o(1), we use `jemalloc_sz2binind_lgq*` . ## Testing This is the first draft. I did some initial testing that basically fragmentation by reducing max memory and than waiting for defrag to reach desired level. The test only serves as sanity that defrag is succeeding eventually, no data provided here regarding efficiency and performance. ### Test: 1. disable `activedefrag` 2. run valkey benchmark on overlapping address ranges with different block sizes 3. wait untill `used_memory` reaches 10GB 4. set `maxmemory` to 5GB and `maxmemory-policy` to `allkeys-lru` 5. stop load 6. wait for `mem_fragmentation_ratio` to reach 2 7. enable `activedefrag` - start test timer 8. wait until reach `mem_fragmentation_ratio` = 1.1 #### Results*: (With this PR)Test results: ` 56 sec` (Without this PR)Test results: `67 sec` *both runs perform same "work" number of buffers moved to reach fragmentation target Next benchmarking is to compare to: - DONE // existing `je_get_defrag_hint` - compare with naive defrag all: `int defrag_hint() {return 1;}` --------- Signed-off-by: Zvi Schneider Signed-off-by: Zvi Schneider Signed-off-by: zvi-code <54795925+zvi-code@users.noreply.github.com> Co-authored-by: Zvi Schneider Co-authored-by: Zvi Schneider Co-authored-by: Madelyn Olson --- cmake/Modules/SourceFiles.cmake | 1 + .../internal/jemalloc_internal_inlines_c.h | 51 --- .../include/jemalloc/jemalloc_macros.h.in | 4 - deps/jemalloc/src/jemalloc.c | 9 - src/Makefile | 2 +- src/allocator_defrag.c | 426 ++++++++++++++++++ src/allocator_defrag.h | 22 + src/defrag.c | 14 +- src/server.c | 9 +- src/server.h | 1 + src/zmalloc.c | 79 +--- src/zmalloc.h | 19 +- 12 files changed, 466 insertions(+), 171 deletions(-) create mode 100644 src/allocator_defrag.c create mode 100644 src/allocator_defrag.h diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake index d76f17625e..873229d6f0 100644 --- a/cmake/Modules/SourceFiles.cmake +++ b/cmake/Modules/SourceFiles.cmake @@ -74,6 +74,7 @@ set(VALKEY_SERVER_SRCS ${CMAKE_SOURCE_DIR}/src/geohash.c ${CMAKE_SOURCE_DIR}/src/geohash_helper.c ${CMAKE_SOURCE_DIR}/src/childinfo.c + ${CMAKE_SOURCE_DIR}/src/allocator_defrag.c ${CMAKE_SOURCE_DIR}/src/defrag.c ${CMAKE_SOURCE_DIR}/src/siphash.c ${CMAKE_SOURCE_DIR}/src/rax.c diff --git a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 2cd7e7ce93..b0868b7d61 100644 --- a/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/deps/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -337,55 +337,4 @@ imalloc_fastpath(size_t size, void *(fallback_alloc)(size_t)) { return fallback_alloc(size); } -JEMALLOC_ALWAYS_INLINE int -iget_defrag_hint(tsdn_t *tsdn, void* ptr) { - int defrag = 0; - emap_alloc_ctx_t alloc_ctx; - emap_alloc_ctx_lookup(tsdn, &arena_emap_global, ptr, &alloc_ctx); - if (likely(alloc_ctx.slab)) { - /* Small allocation. */ - edata_t *slab = emap_edata_lookup(tsdn, &arena_emap_global, ptr); - arena_t *arena = arena_get_from_edata(slab); - szind_t binind = edata_szind_get(slab); - unsigned binshard = edata_binshard_get(slab); - bin_t *bin = arena_get_bin(arena, binind, binshard); - malloc_mutex_lock(tsdn, &bin->lock); - arena_dalloc_bin_locked_info_t info; - arena_dalloc_bin_locked_begin(&info, binind); - /* Don't bother moving allocations from the slab currently used for new allocations */ - if (slab != bin->slabcur) { - int free_in_slab = edata_nfree_get(slab); - if (free_in_slab) { - const bin_info_t *bin_info = &bin_infos[binind]; - /* Find number of non-full slabs and the number of regs in them */ - unsigned long curslabs = 0; - size_t curregs = 0; - /* Run on all bin shards (usually just one) */ - for (uint32_t i=0; i< bin_info->n_shards; i++) { - bin_t *bb = arena_get_bin(arena, binind, i); - curslabs += bb->stats.nonfull_slabs; - /* Deduct the regs in full slabs (they're not part of the game) */ - unsigned long full_slabs = bb->stats.curslabs - bb->stats.nonfull_slabs; - curregs += bb->stats.curregs - full_slabs * bin_info->nregs; - if (bb->slabcur) { - /* Remove slabcur from the overall utilization (not a candidate to nove from) */ - curregs -= bin_info->nregs - edata_nfree_get(bb->slabcur); - curslabs -= 1; - } - } - /* Compare the utilization ratio of the slab in question to the total average - * among non-full slabs. To avoid precision loss in division, we do that by - * extrapolating the usage of the slab as if all slabs have the same usage. - * If this slab is less used than the average, we'll prefer to move the data - * to hopefully more used ones. To avoid stagnation when all slabs have the same - * utilization, we give additional 12.5% weight to the decision to defrag. */ - defrag = (bin_info->nregs - free_in_slab) * curslabs <= curregs + curregs / 8; - } - } - arena_dalloc_bin_locked_finish(tsdn, arena, bin, &info); - malloc_mutex_unlock(tsdn, &bin->lock); - } - return defrag; -} - #endif /* JEMALLOC_INTERNAL_INLINES_C_H */ diff --git a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in index d04af34d93..ebb3137e6f 100644 --- a/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in +++ b/deps/jemalloc/include/jemalloc/jemalloc_macros.h.in @@ -147,7 +147,3 @@ #else # define JEMALLOC_SYS_NOTHROW JEMALLOC_NOTHROW #endif - -/* This version of Jemalloc, modified for Redis, has the je_get_defrag_hint() - * function. */ -#define JEMALLOC_FRAG_HINT diff --git a/deps/jemalloc/src/jemalloc.c b/deps/jemalloc/src/jemalloc.c index 83026093be..ea9232c5d6 100644 --- a/deps/jemalloc/src/jemalloc.c +++ b/deps/jemalloc/src/jemalloc.c @@ -4474,12 +4474,3 @@ jemalloc_postfork_child(void) { } /******************************************************************************/ - -/* Helps the application decide if a pointer is worth re-allocating in order to reduce fragmentation. - * returns 1 if the allocation should be moved, and 0 if the allocation be kept. - * If the application decides to re-allocate it should use MALLOCX_TCACHE_NONE when doing so. */ -JEMALLOC_EXPORT int JEMALLOC_NOTHROW -get_defrag_hint(void* ptr) { - assert(ptr != NULL); - return iget_defrag_hint(TSDN_NULL, ptr); -} diff --git a/src/Makefile b/src/Makefile index a76356e9d5..f876f55dec 100644 --- a/src/Makefile +++ b/src/Makefile @@ -411,7 +411,7 @@ endif ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX) diff --git a/src/allocator_defrag.c b/src/allocator_defrag.c new file mode 100644 index 0000000000..b2330c95e0 --- /dev/null +++ b/src/allocator_defrag.c @@ -0,0 +1,426 @@ +/* Copyright 2024- Valkey contributors + * All rights reserved. + * SPDX-License-Identifier: BSD-3-Clause + */ + +/* + * This file implements allocator-specific defragmentation logic used + * within the Valkey engine. Below is the relationship between various + * components involved in allocation and defragmentation: + * + * Application code + * / \ + * allocation / \ defrag + * / \ + * zmalloc allocator_defrag + * / | \ / \ + * / | \ / \ + * / | \ / \ + * libc tcmalloc jemalloc other + * + * Explanation: + * - **Application code**: High-level application logic that uses memory + * allocation and may trigger defragmentation. + * - **zmalloc**: An abstraction layer over the memory allocator, providing + * a uniform allocation interface to the application code. It can delegate + * to various underlying allocators (e.g., libc, tcmalloc, jemalloc, or others). + * It is not dependant on defrag implementation logic and it's possible to use jemalloc + * version that does not support defrag. + * - **allocator_defrag**: This file contains allocator-specific logic for + * defragmentation, invoked from `defrag.c` when memory defragmentation is needed. + * currently jemalloc is the only allocator with implemented defrag logic. It is possible that + * future implementation will include non-allocator defragmentation (think of data-structure + * compaction for example). + * - **Underlying allocators**: These are the actual memory allocators, such as + * libc, tcmalloc, jemalloc, or other custom allocators. The defragmentation + * logic in `allocator_defrag` interacts with these allocators to reorganize + * memory and reduce fragmentation. + * + * The `defrag.c` file acts as the central entry point for defragmentation, + * invoking allocator-specific implementations provided here in `allocator_defrag.c`. + * + * Note: Developers working on `zmalloc` or `allocator_defrag` should refer to + * the other component to ensure both are using the same allocator configuration. + */ + +#include +#include "serverassert.h" +#include "allocator_defrag.h" + +#define UNUSED(x) (void)(x) + +#if defined(HAVE_DEFRAG) && defined(USE_JEMALLOC) + +#define STRINGIFY_(x) #x +#define STRINGIFY(x) STRINGIFY_(x) + +#define BATCH_QUERY_ARGS_OUT 3 +#define SLAB_NFREE(out, i) out[(i) * BATCH_QUERY_ARGS_OUT] +#define SLAB_LEN(out, i) out[(i) * BATCH_QUERY_ARGS_OUT + 2] +#define SLAB_NUM_REGS(out, i) out[(i) * BATCH_QUERY_ARGS_OUT + 1] + +#define UTILIZATION_THRESHOLD_FACTOR_MILI (125) // 12.5% additional utilization + +/* + * Represents a precomputed key for querying jemalloc statistics. + * + * The `jeMallctlKey` structure stores a key corresponding to a specific jemalloc + * statistics field name. This key is used with the `je_mallctlbymib` interface + * to query statistics more efficiently, bypassing the need for runtime string + * lookup and translation performed by `je_mallctl`. + * + * - `je_mallctlnametomib` is called once for each statistics field to precompute + * and store the key corresponding to the field name. + * - Subsequent queries use `je_mallctlbymib` with the stored key, avoiding the + * overhead of repeated string-based lookups. + * + */ +typedef struct jeMallctlKey { + size_t key[6]; /* The precomputed key used to query jemalloc statistics. */ + size_t keylen; /* The length of the key array. */ +} jeMallctlKey; + +/* Stores MIB (Management Information Base) keys for jemalloc bin queries. + * + * This struct holds precomputed `jeMallctlKey` values for querying various + * jemalloc bin-related statistics efficiently. + */ +typedef struct jeBinInfoKeys { + jeMallctlKey curr_slabs; /* Key to query the current number of slabs in the bin. */ + jeMallctlKey nonfull_slabs; /* Key to query the number of non-full slabs in the bin. */ + jeMallctlKey curr_regs; /* Key to query the current number of regions in the bin. */ +} jeBinInfoKeys; + +/* Represents detailed information about a jemalloc bin. + * + * This struct provides metadata about a jemalloc bin, including the size of + * its regions, total number of regions, and related MIB keys for efficient + * queries. + */ +typedef struct jeBinInfo { + size_t reg_size; /* Size of each region in the bin. */ + uint32_t nregs; /* Total number of regions in the bin. */ + jeBinInfoKeys info_keys; /* Precomputed MIB keys for querying bin statistics. */ +} jeBinInfo; + +/* Represents the configuration for jemalloc bins. + * + * This struct contains information about the number of bins and metadata for + * each bin, as well as precomputed keys for batch utility queries and epoch updates. + */ +typedef struct jemallocCB { + unsigned nbins; /* Number of bins in the jemalloc configuration. */ + jeBinInfo *bin_info; /* Array of `jeBinInfo` structs, one for each bin. */ + jeMallctlKey util_batch_query; /* Key to query batch utilization information. */ + jeMallctlKey epoch; /* Key to trigger statistics sync between threads. */ +} jemallocCB; + +/* Represents the latest usage statistics for a jemalloc bin. + * + * This struct tracks the current usage of a bin, including the number of slabs + * and regions, and calculates the number of full slabs from other fields. + */ +typedef struct jemallocBinUsageData { + size_t curr_slabs; /* Current number of slabs in the bin. */ + size_t curr_nonfull_slabs; /* Current number of non-full slabs in the bin. */ + size_t curr_regs; /* Current number of regions in the bin. */ +} jemallocBinUsageData; + + +static int defrag_supported = 0; +/* Control block holding information about bins and query helper - + * this structure is initialized once when calling allocatorDefragInit. It does not change afterwards*/ +static jemallocCB je_cb = {0, NULL, {{0}, 0}, {{0}, 0}}; +/* Holds the latest usage statistics for each bin. This structure is updated when calling + * allocatorDefragGetFragSmallbins and later is used to make a defrag decision for a memory pointer. */ +static jemallocBinUsageData *je_usage_info = NULL; + + +/* ----------------------------------------------------------------------------- + * Alloc/Free API that are cooperative with defrag + * -------------------------------------------------------------------------- */ + +/* Allocation and free functions that bypass the thread cache + * and go straight to the allocator arena bins. + * Currently implemented only for jemalloc. Used for online defragmentation. + */ +void *allocatorDefragAlloc(size_t size) { + void *ptr = je_mallocx(size, MALLOCX_TCACHE_NONE); + return ptr; +} +void allocatorDefragFree(void *ptr, size_t size) { + if (ptr == NULL) return; + je_sdallocx(ptr, size, MALLOCX_TCACHE_NONE); +} + +/* ----------------------------------------------------------------------------- + * Helper functions for jemalloc translation between size and index + * -------------------------------------------------------------------------- */ + +/* Get the bin index in bin array from the reg_size. + * + * these are reverse engineered mapping of reg_size -> binind. We need this information because the utilization query + * returns the size of the buffer and not the bin index, and we need the bin index to access it's usage information + * + * Note: In case future PR will return the binind (that is better API anyway) we can get rid of + * these conversion functions + */ +static inline unsigned jeSize2BinIndexLgQ3(size_t sz) { + /* Smallest power-of-2 quantum for binning */ + const size_t size_class_group_size = 4; + /* Number of bins in each power-of-2 size class group */ + const size_t lg_quantum_3_first_pow2 = 3; + /* Offset for exponential bins */ + const size_t lg_quantum_3_offset = ((64 >> lg_quantum_3_first_pow2) - 1); + /* Small sizes (8-64 bytes) use linear binning */ + if (sz <= 64) { // 64 = 1 << (lg_quantum_3_first_pow2 + 3) + return (sz >> 3) - 1; // Divide by 8 and subtract 1 + } + + /* For larger sizes, use exponential binning */ + + /* Calculate leading zeros of (sz - 1) to properly handle power-of-2 sizes */ + unsigned leading_zeros = __builtin_clzll(sz - 1); + unsigned exp = 64 - leading_zeros; // Effective log2(sz) + + /* Calculate the size's position within its group */ + unsigned within_group_offset = size_class_group_size - + (((1ULL << exp) - sz) >> (exp - lg_quantum_3_first_pow2)); + + /* Calculate the final bin index */ + return within_group_offset + + ((exp - (lg_quantum_3_first_pow2 + 3)) - 1) * size_class_group_size + + lg_quantum_3_offset; +} +/* ----------------------------------------------------------------------------- + * Interface functions to get fragmentation info from jemalloc + * -------------------------------------------------------------------------- */ +#define ARENA_TO_QUERY MALLCTL_ARENAS_ALL + +static inline void jeRefreshStats(const jemallocCB *je_cb) { + uint64_t epoch = 1; // Value doesn't matter + size_t sz = sizeof(epoch); + /* Refresh stats */ + je_mallctlbymib(je_cb->epoch.key, je_cb->epoch.keylen, &epoch, &sz, &epoch, sz); +} + +/* Extract key that corresponds to the given name for fast query. This should be called once for each key_name */ +static inline int jeQueryKeyInit(const char *key_name, jeMallctlKey *key_info) { + key_info->keylen = sizeof(key_info->key) / sizeof(key_info->key[0]); + int res = je_mallctlnametomib(key_name, key_info->key, &key_info->keylen); + /* sanity check that returned value is not larger than provided */ + assert(key_info->keylen <= sizeof(key_info->key) / sizeof(key_info->key[0])); + return res; +} + +/* Query jemalloc control interface using previously extracted key (with jeQueryKeyInit) instead of name string. + * This interface (named MIB in jemalloc) is faster as it avoids string dict lookup at run-time. */ +static inline int jeQueryCtlInterface(const jeMallctlKey *key_info, void *value) { + size_t sz = sizeof(size_t); + return je_mallctlbymib(key_info->key, key_info->keylen, value, &sz, NULL, 0); +} + +static inline int binQueryHelperInitialization(jeBinInfoKeys *helper, unsigned bin_index) { + char mallctl_name[128]; + + /* Mib of fetch number of used regions in the bin */ + snprintf(mallctl_name, sizeof(mallctl_name), "stats.arenas." STRINGIFY(ARENA_TO_QUERY) ".bins.%d.curregs", bin_index); + if (jeQueryKeyInit(mallctl_name, &helper->curr_regs) != 0) return -1; + /* Mib of fetch number of current slabs in the bin */ + snprintf(mallctl_name, sizeof(mallctl_name), "stats.arenas." STRINGIFY(ARENA_TO_QUERY) ".bins.%d.curslabs", bin_index); + if (jeQueryKeyInit(mallctl_name, &helper->curr_slabs) != 0) return -1; + /* Mib of fetch nonfull slabs */ + snprintf(mallctl_name, sizeof(mallctl_name), "stats.arenas." STRINGIFY(ARENA_TO_QUERY) ".bins.%d.nonfull_slabs", bin_index); + if (jeQueryKeyInit(mallctl_name, &helper->nonfull_slabs) != 0) return -1; + + return 0; +} + +/* Initializes the defragmentation system for the jemalloc memory allocator. + * + * This function performs the necessary setup and initialization steps for the defragmentation system. + * It retrieves the configuration information for the jemalloc arenas and bins, and initializes the usage + * statistics data structure. + * + * return 0 on success, or a non-zero error code on failure. + * + * The initialization process involves the following steps: + * 1. Check if defragmentation is supported by the current jemalloc version. + * 2. Retrieve the arena bin configuration information using the `je_mallctlbymib` function. + * 3. Initialize the `usage_latest` structure with the bin usage statistics and configuration data. + * 4. Set the `defrag_supported` flag to indicate that defragmentation is enabled. + * + * Note: This function must be called before using any other defragmentation-related functionality. + * It should be called during the initialization phase of the code that uses the + * defragmentation feature. + */ +int allocatorDefragInit(void) { + char mallctl_name[100]; + jeBinInfo *bin_info; + size_t sz; + int je_res; + + /* the init should be called only once, fail if unexpected call */ + assert(!defrag_supported); + + /* Get the mib of the per memory pointers query command that is used during defrag scan over memory */ + if (jeQueryKeyInit("experimental.utilization.batch_query", &je_cb.util_batch_query) != 0) return -1; + + je_res = jeQueryKeyInit("epoch", &je_cb.epoch); + assert(je_res == 0); + jeRefreshStats(&je_cb); + + /* get quantum for verification only, current code assumes lg-quantum should be 3 */ + size_t jemalloc_quantum; + sz = sizeof(jemalloc_quantum); + je_mallctl("arenas.quantum", &jemalloc_quantum, &sz, NULL, 0); + /* lg-quantum should be 3 so jemalloc_quantum should be 1<<3 */ + assert(jemalloc_quantum == 8); + + sz = sizeof(je_cb.nbins); + je_res = je_mallctl("arenas.nbins", &je_cb.nbins, &sz, NULL, 0); + assert(je_res == 0 && je_cb.nbins != 0); + + je_cb.bin_info = je_calloc(je_cb.nbins, sizeof(jeBinInfo)); + assert(je_cb.bin_info != NULL); + je_usage_info = je_calloc(je_cb.nbins, sizeof(jemallocBinUsageData)); + assert(je_usage_info != NULL); + + for (unsigned j = 0; j < je_cb.nbins; j++) { + bin_info = &je_cb.bin_info[j]; + /* The size of the current bin */ + snprintf(mallctl_name, sizeof(mallctl_name), "arenas.bin.%d.size", j); + sz = sizeof(bin_info->reg_size); + je_res = je_mallctl(mallctl_name, &bin_info->reg_size, &sz, NULL, 0); + assert(je_res == 0); + /* Number of regions per slab */ + snprintf(mallctl_name, sizeof(mallctl_name), "arenas.bin.%d.nregs", j); + sz = sizeof(bin_info->nregs); + je_res = je_mallctl(mallctl_name, &bin_info->nregs, &sz, NULL, 0); + assert(je_res == 0); + + /* init bin specific fast query keys */ + je_res = binQueryHelperInitialization(&bin_info->info_keys, j); + assert(je_res == 0); + + /* verify the reverse map of reg_size to bin index */ + assert(jeSize2BinIndexLgQ3(bin_info->reg_size) == j); + } + + /* defrag is supported mark it to enable defrag queries */ + defrag_supported = 1; + return 0; +} + +/* Total size of consumed meomry in unused regs in small bins (AKA external fragmentation). + * The function will refresh the epoch. + * + * return total fragmentation bytes + */ +unsigned long allocatorDefragGetFragSmallbins(void) { + assert(defrag_supported); + unsigned long frag = 0; + jeRefreshStats(&je_cb); + for (unsigned j = 0; j < je_cb.nbins; j++) { + jeBinInfo *bin_info = &je_cb.bin_info[j]; + jemallocBinUsageData *bin_usage = &je_usage_info[j]; + + /* Number of current slabs in the bin */ + jeQueryCtlInterface(&bin_info->info_keys.curr_regs, &bin_usage->curr_regs); + /* Number of current slabs in the bin */ + jeQueryCtlInterface(&bin_info->info_keys.curr_slabs, &bin_usage->curr_slabs); + /* Number of non full slabs in the bin */ + jeQueryCtlInterface(&bin_info->info_keys.nonfull_slabs, &bin_usage->curr_nonfull_slabs); + + /* Calculate the fragmentation bytes for the current bin and add it to the total. */ + frag += ((bin_info->nregs * bin_usage->curr_slabs) - bin_usage->curr_regs) * bin_info->reg_size; + } + return frag; +} + +/* Determines whether defragmentation should be performed on a pointer based on jemalloc information. + * + * bin_info Pointer to the bin information structure. + * bin_usage Pointer to the bin usage structure. + * nalloced Number of allocated regions in the bin. + * + * return 1 if defragmentation should be performed, 0 otherwise. + * + * This function checks the following conditions to determine if defragmentation should be performed: + * 1. If the number of allocated regions (nalloced) is equal to the total number of regions (bin_info->nregs), + * defragmentation is not necessary as moving regions is guaranteed not to change the fragmentation ratio. + * 2. If the number of non-full slabs (bin_usage->curr_nonfull_slabs) is less than 2, defragmentation is not performed + * because there is no other slab to move regions to. + * 3. If slab utilization < 'avg utilization'*1.125 [code 1.125 == (1000+UTILIZATION_THRESHOLD_FACTOR_MILI)/1000] + * than we should defrag. This is aligned with previous je_defrag_hint implementation. + */ +static inline int makeDefragDecision(jeBinInfo *bin_info, jemallocBinUsageData *bin_usage, unsigned long nalloced) { + unsigned long curr_full_slabs = bin_usage->curr_slabs - bin_usage->curr_nonfull_slabs; + size_t allocated_nonfull = bin_usage->curr_regs - curr_full_slabs * bin_info->nregs; + if (bin_info->nregs == nalloced || bin_usage->curr_nonfull_slabs < 2 || + 1000 * nalloced * bin_usage->curr_nonfull_slabs > (1000 + UTILIZATION_THRESHOLD_FACTOR_MILI) * allocated_nonfull) { + return 0; + } + return 1; +} + +/* + * Performs defragmentation analysis for a given ptr. + * + * ptr - ptr to memory region to be analyzed. + * + * return - the function returns 1 if defrag should be performed, 0 otherwise. + */ +int allocatorShouldDefrag(void *ptr) { + assert(defrag_supported); + size_t out[BATCH_QUERY_ARGS_OUT]; + size_t out_sz = sizeof(out); + size_t in_sz = sizeof(ptr); + for (unsigned j = 0; j < BATCH_QUERY_ARGS_OUT; j++) { + out[j] = -1; + } + je_mallctlbymib(je_cb.util_batch_query.key, + je_cb.util_batch_query.keylen, + out, &out_sz, + &ptr, in_sz); + /* handle results with appropriate quantum value */ + assert(SLAB_NUM_REGS(out, 0) > 0); + assert(SLAB_LEN(out, 0) > 0); + assert(SLAB_NFREE(out, 0) != (size_t)-1); + unsigned region_size = SLAB_LEN(out, 0) / SLAB_NUM_REGS(out, 0); + /* check that the allocation size is in range of small bins */ + if (region_size > je_cb.bin_info[je_cb.nbins - 1].reg_size) { + return 0; + } + /* get the index based on quantum used */ + unsigned binind = jeSize2BinIndexLgQ3(region_size); + /* make sure binind is in range and reverse map is correct */ + assert(binind < je_cb.nbins && region_size == je_cb.bin_info[binind].reg_size); + + return makeDefragDecision(&je_cb.bin_info[binind], + &je_usage_info[binind], + je_cb.bin_info[binind].nregs - SLAB_NFREE(out, 0)); +} + +#else + +int allocatorDefragInit(void) { + return -1; +} +void allocatorDefragFree(void *ptr, size_t size) { + UNUSED(ptr); + UNUSED(size); +} +__attribute__((malloc)) void *allocatorDefragAlloc(size_t size) { + UNUSED(size); + return NULL; +} +unsigned long allocatorDefragGetFragSmallbins(void) { + return 0; +} + +int allocatorShouldDefrag(void *ptr) { + UNUSED(ptr); + return 0; +} +#endif diff --git a/src/allocator_defrag.h b/src/allocator_defrag.h new file mode 100644 index 0000000000..7fb56208b6 --- /dev/null +++ b/src/allocator_defrag.h @@ -0,0 +1,22 @@ +#ifndef __ALLOCATOR_DEFRAG_H +#define __ALLOCATOR_DEFRAG_H + +#if defined(USE_JEMALLOC) +#include +/* We can enable the server defrag capabilities only if we are using Jemalloc + * and the version that has the experimental.utilization namespace in mallctl . */ +#if defined(JEMALLOC_VERSION_MAJOR) && \ + (JEMALLOC_VERSION_MAJOR > 5 || \ + (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR > 2) || \ + (JEMALLOC_VERSION_MAJOR == 5 && JEMALLOC_VERSION_MINOR == 2 && JEMALLOC_VERSION_BUGFIX >= 1)) +#define HAVE_DEFRAG +#endif +#endif + +int allocatorDefragInit(void); +void allocatorDefragFree(void *ptr, size_t size); +__attribute__((malloc)) void *allocatorDefragAlloc(size_t size); +unsigned long allocatorDefragGetFragSmallbins(void); +int allocatorShouldDefrag(void *ptr); + +#endif /* __ALLOCATOR_DEFRAG_H */ diff --git a/src/defrag.c b/src/defrag.c index 4d34009f8b..b49a175f7c 100644 --- a/src/defrag.c +++ b/src/defrag.c @@ -49,10 +49,6 @@ typedef struct defragPubSubCtx { dict *(*clientPubSubChannels)(client *); } defragPubSubCtx; -/* this method was added to jemalloc in order to help us understand which - * pointers are worthwhile moving and which aren't */ -int je_get_defrag_hint(void *ptr); - /* Defrag helper for generic allocations. * * returns NULL in case the allocation wasn't moved. @@ -61,7 +57,7 @@ int je_get_defrag_hint(void *ptr); void *activeDefragAlloc(void *ptr) { size_t size; void *newptr; - if (!je_get_defrag_hint(ptr)) { + if (!allocatorShouldDefrag(ptr)) { server.stat_active_defrag_misses++; return NULL; } @@ -69,9 +65,9 @@ void *activeDefragAlloc(void *ptr) { * make sure not to use the thread cache. so that we don't get back the same * pointers we try to free */ size = zmalloc_size(ptr); - newptr = zmalloc_no_tcache(size); + newptr = allocatorDefragAlloc(size); memcpy(newptr, ptr, size); - zfree_no_tcache(ptr); + allocatorDefragFree(ptr, size); server.stat_active_defrag_hits++; return newptr; } @@ -756,8 +752,8 @@ void defragScanCallback(void *privdata, const dictEntry *de) { * without the possibility of getting any results. */ float getAllocatorFragmentation(size_t *out_frag_bytes) { size_t resident, active, allocated, frag_smallbins_bytes; - zmalloc_get_allocator_info(&allocated, &active, &resident, NULL, NULL, &frag_smallbins_bytes); - + zmalloc_get_allocator_info(&allocated, &active, &resident, NULL, NULL); + frag_smallbins_bytes = allocatorDefragGetFragSmallbins(); /* Calculate the fragmentation ratio as the proportion of wasted memory in small * bins (which are defraggable) relative to the total allocated memory (including large bins). * This is because otherwise, if most of the memory usage is large bins, we may show high percentage, diff --git a/src/server.c b/src/server.c index aebbb57a93..51de89ee53 100644 --- a/src/server.c +++ b/src/server.c @@ -1297,8 +1297,8 @@ void cronUpdateMemoryStats(void) { * allocations, and allocator reserved pages that can be pursed (all not actual frag) */ zmalloc_get_allocator_info( &server.cron_malloc_stats.allocator_allocated, &server.cron_malloc_stats.allocator_active, - &server.cron_malloc_stats.allocator_resident, NULL, &server.cron_malloc_stats.allocator_muzzy, - &server.cron_malloc_stats.allocator_frag_smallbins_bytes); + &server.cron_malloc_stats.allocator_resident, NULL, &server.cron_malloc_stats.allocator_muzzy); + server.cron_malloc_stats.allocator_frag_smallbins_bytes = allocatorDefragGetFragSmallbins(); /* in case the allocator isn't providing these stats, fake them so that * fragmentation info still shows some (inaccurate metrics) */ if (!server.cron_malloc_stats.allocator_resident) { @@ -6794,7 +6794,10 @@ __attribute__((weak)) int main(int argc, char **argv) { #endif tzset(); /* Populates 'timezone' global. */ zmalloc_set_oom_handler(serverOutOfMemoryHandler); - +#if defined(HAVE_DEFRAG) + int res = allocatorDefragInit(); + serverAssert(res == 0); +#endif /* To achieve entropy, in case of containers, their time() and getpid() can * be the same. But value of tv_usec is fast enough to make the difference */ gettimeofday(&tv, NULL); diff --git a/src/server.h b/src/server.h index 531ca8e7c8..8962b04086 100644 --- a/src/server.h +++ b/src/server.h @@ -35,6 +35,7 @@ #include "solarisfixes.h" #include "rio.h" #include "commands.h" +#include "allocator_defrag.h" #include #include diff --git a/src/zmalloc.c b/src/zmalloc.c index e18fa8bac2..a696111e47 100644 --- a/src/zmalloc.c +++ b/src/zmalloc.c @@ -84,8 +84,6 @@ void zlibc_free(void *ptr) { #define calloc(count, size) je_calloc(count, size) #define realloc(ptr, size) je_realloc(ptr, size) #define free(ptr) je_free(ptr) -#define mallocx(size, flags) je_mallocx(size, flags) -#define dallocx(ptr, flags) je_dallocx(ptr, flags) #endif #define thread_local _Thread_local @@ -207,25 +205,6 @@ void *zmalloc_usable(size_t size, size_t *usable) { return ptr; } -/* Allocation and free functions that bypass the thread cache - * and go straight to the allocator arena bins. - * Currently implemented only for jemalloc. Used for online defragmentation. */ -#ifdef HAVE_DEFRAG -void *zmalloc_no_tcache(size_t size) { - if (size >= SIZE_MAX / 2) zmalloc_oom_handler(size); - void *ptr = mallocx(size + PREFIX_SIZE, MALLOCX_TCACHE_NONE); - if (!ptr) zmalloc_oom_handler(size); - update_zmalloc_stat_alloc(zmalloc_size(ptr)); - return ptr; -} - -void zfree_no_tcache(void *ptr) { - if (ptr == NULL) return; - update_zmalloc_stat_free(zmalloc_size(ptr)); - dallocx(ptr, MALLOCX_TCACHE_NONE); -} -#endif - /* Try allocating memory and zero it, and return NULL if failed. * '*usable' is set to the usable size if non NULL. */ static inline void *ztrycalloc_usable_internal(size_t size, size_t *usable) { @@ -683,52 +662,7 @@ size_t zmalloc_get_rss(void) { #define STRINGIFY_(x) #x #define STRINGIFY(x) STRINGIFY_(x) -/* Compute the total memory wasted in fragmentation of inside small arena bins. - * Done by summing the memory in unused regs in all slabs of all small bins. */ -size_t zmalloc_get_frag_smallbins(void) { - unsigned nbins; - size_t sz, frag = 0; - char buf[100]; - - sz = sizeof(unsigned); - assert(!je_mallctl("arenas.nbins", &nbins, &sz, NULL, 0)); - for (unsigned j = 0; j < nbins; j++) { - size_t curregs, curslabs, reg_size; - uint32_t nregs; - - /* The size of the current bin */ - snprintf(buf, sizeof(buf), "arenas.bin.%d.size", j); - sz = sizeof(size_t); - assert(!je_mallctl(buf, ®_size, &sz, NULL, 0)); - - /* Number of used regions in the bin */ - snprintf(buf, sizeof(buf), "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".bins.%d.curregs", j); - sz = sizeof(size_t); - assert(!je_mallctl(buf, &curregs, &sz, NULL, 0)); - - /* Number of regions per slab */ - snprintf(buf, sizeof(buf), "arenas.bin.%d.nregs", j); - sz = sizeof(uint32_t); - assert(!je_mallctl(buf, &nregs, &sz, NULL, 0)); - - /* Number of current slabs in the bin */ - snprintf(buf, sizeof(buf), "stats.arenas." STRINGIFY(MALLCTL_ARENAS_ALL) ".bins.%d.curslabs", j); - sz = sizeof(size_t); - assert(!je_mallctl(buf, &curslabs, &sz, NULL, 0)); - - /* Calculate the fragmentation bytes for the current bin and add it to the total. */ - frag += ((nregs * curslabs) - curregs) * reg_size; - } - - return frag; -} - -int zmalloc_get_allocator_info(size_t *allocated, - size_t *active, - size_t *resident, - size_t *retained, - size_t *muzzy, - size_t *frag_smallbins_bytes) { +int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident, size_t *retained, size_t *muzzy) { uint64_t epoch = 1; size_t sz; *allocated = *resident = *active = 0; @@ -763,8 +697,6 @@ int zmalloc_get_allocator_info(size_t *allocated, *muzzy = pmuzzy * page; } - /* Total size of consumed meomry in unused regs in small bins (AKA external fragmentation). */ - *frag_smallbins_bytes = zmalloc_get_frag_smallbins(); return 1; } @@ -789,13 +721,8 @@ int jemalloc_purge(void) { #else -int zmalloc_get_allocator_info(size_t *allocated, - size_t *active, - size_t *resident, - size_t *retained, - size_t *muzzy, - size_t *frag_smallbins_bytes) { - *allocated = *resident = *active = *frag_smallbins_bytes = 0; +int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident, size_t *retained, size_t *muzzy) { + *allocated = *resident = *active = 0; if (retained) *retained = 0; if (muzzy) *muzzy = 0; return 1; diff --git a/src/zmalloc.h b/src/zmalloc.h index 9b51f4c866..38c2bae864 100644 --- a/src/zmalloc.h +++ b/src/zmalloc.h @@ -100,13 +100,6 @@ #include #endif -/* We can enable the server defrag capabilities only if we are using Jemalloc - * and the version used is our special version modified for the server having - * the ability to return per-allocation fragmentation hints. */ -#if defined(USE_JEMALLOC) && defined(JEMALLOC_FRAG_HINT) -#define HAVE_DEFRAG -#endif - /* The zcalloc symbol is a symbol name already used by zlib, which is defining * other names using the "z" prefix specific to zlib. In practice, linking * valkey with a static openssl, which itself might depend on a static libz @@ -138,12 +131,7 @@ __attribute__((malloc)) char *zstrdup(const char *s); size_t zmalloc_used_memory(void); void zmalloc_set_oom_handler(void (*oom_handler)(size_t)); size_t zmalloc_get_rss(void); -int zmalloc_get_allocator_info(size_t *allocated, - size_t *active, - size_t *resident, - size_t *retained, - size_t *muzzy, - size_t *frag_smallbins_bytes); +int zmalloc_get_allocator_info(size_t *allocated, size_t *active, size_t *resident, size_t *retained, size_t *muzzy); void set_jemalloc_bg_thread(int enable); int jemalloc_purge(void); size_t zmalloc_get_private_dirty(long pid); @@ -153,11 +141,6 @@ void zlibc_free(void *ptr); void zlibc_trim(void); void zmadvise_dontneed(void *ptr); -#ifdef HAVE_DEFRAG -void zfree_no_tcache(void *ptr); -__attribute__((malloc)) void *zmalloc_no_tcache(size_t size); -#endif - #ifndef HAVE_MALLOC_SIZE size_t zmalloc_size(void *ptr); size_t zmalloc_usable_size(void *ptr); From c4be326c3225ca4323ad7c21ccafee7197d0d539 Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 22 Nov 2024 10:28:59 +0800 Subject: [PATCH 33/60] Make manual failover reset the on-going election to promote failover (#1274) If a manual failover got timed out, like the election don't get the enough votes, since we have a auth_timeout and a auth_retry_time, a new manual failover will not be able to proceed on the replica side. Like if we initiate a new manual failover after a election timed out, we will pause the primary, but on the replica side, due to retry_time, replica does not trigger the new election and the manual failover will eventually time out. In this case, if we initiate manual failover again and there is an ongoing election, we will reset it so that the replica can initiate a new election at the manual failover's request. Signed-off-by: Binbin --- src/cluster_legacy.c | 25 +++++++++++++-- tests/unit/cluster/manual-failover.tcl | 42 ++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 7b3384ee9f..c618feccae 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -4848,6 +4848,27 @@ void clusterHandleReplicaMigration(int max_replicas) { * data loss due to the asynchronous primary-replica replication. * -------------------------------------------------------------------------- */ +void manualFailoverCanStart(void) { + serverAssert(server.cluster->mf_can_start == 0); + + if (server.cluster->failover_auth_time) { + /* There is another manual failover requested by the user. + * If we have an ongoing election, reset it because the user may initiate + * manual failover again when the previous manual failover timed out. + * Otherwise, if the previous election timed out (see auth_timeout) and + * before the next retry (see auth_retry_time), the new manual failover + * will pause the primary and replica can not do anything to advance the + * manual failover, and then the manual failover eventually times out. */ + server.cluster->failover_auth_time = 0; + serverLog(LL_WARNING, + "Failover election in progress for epoch %llu, but received a new manual failover. " + "Resetting the election.", + (unsigned long long)server.cluster->failover_auth_epoch); + } + + server.cluster->mf_can_start = 1; +} + /* Reset the manual failover state. This works for both primaries and replicas * as all the state about manual failover is cleared. * @@ -4888,7 +4909,7 @@ void clusterHandleManualFailover(void) { if (server.cluster->mf_primary_offset == replicationGetReplicaOffset()) { /* Our replication offset matches the primary replication offset * announced after clients were paused. We can start the failover. */ - server.cluster->mf_can_start = 1; + manualFailoverCanStart(); serverLog(LL_NOTICE, "All primary replication stream processed, " "manual failover can start."); clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_FAILOVER); @@ -6785,7 +6806,7 @@ int clusterCommandSpecial(client *c) { * primary to agree about the offset. We just failover taking over * it without coordination. */ serverLog(LL_NOTICE, "Forced failover user request accepted (user request from '%s').", client); - server.cluster->mf_can_start = 1; + manualFailoverCanStart(); /* We can start a manual failover as soon as possible, setting a flag * here so that we don't need to waiting for the cron to kick in. */ clusterDoBeforeSleep(CLUSTER_TODO_HANDLE_MANUALFAILOVER); diff --git a/tests/unit/cluster/manual-failover.tcl b/tests/unit/cluster/manual-failover.tcl index 78842068fa..bac2a7a4c7 100644 --- a/tests/unit/cluster/manual-failover.tcl +++ b/tests/unit/cluster/manual-failover.tcl @@ -271,3 +271,45 @@ start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-ping-interval wait_for_cluster_propagation } } ;# start_cluster + +start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 15000}} { + test "Manual failover will reset the on-going election" { + set CLUSTER_PACKET_TYPE_FAILOVER_AUTH_REQUEST 5 + set CLUSTER_PACKET_TYPE_NONE -1 + + # Let other primaries drop FAILOVER_AUTH_REQUEST so that the election won't + # get the enough votes and the election will time out. + R 1 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_FAILOVER_AUTH_REQUEST + R 2 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_FAILOVER_AUTH_REQUEST + + # Replica doing the manual failover. + R 3 cluster failover + + # Waiting for primary and replica to confirm manual failover timeout. + wait_for_log_messages 0 {"*Manual failover timed out*"} 0 1000 50 + wait_for_log_messages -3 {"*Manual failover timed out*"} 0 1000 50 + set loglines1 [count_log_lines 0] + set loglines2 [count_log_lines -3] + + # Undo packet drop, so that replica can win the next election. + R 1 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_NONE + R 2 debug drop-cluster-packet-filter $CLUSTER_PACKET_TYPE_NONE + + # Replica doing the manual failover again. + R 3 cluster failover + + # Make sure the election is reset. + wait_for_log_messages -3 {"*Failover election in progress*Resetting the election*"} $loglines2 1000 50 + + # Wait for failover. + wait_for_condition 1000 50 { + [s -3 role] == "master" + } else { + fail "No failover detected" + } + + # Make sure that the second manual failover does not time out. + verify_no_log_message 0 "*Manual failover timed out*" $loglines1 + verify_no_log_message -3 "*Manual failover timed out*" $loglines2 + } +} ;# start_cluster From 50aae13b0a7fffc6591ee2842d1ef4f2e59096dd Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 22 Nov 2024 10:29:24 +0800 Subject: [PATCH 34/60] Skip reclaim file page cache test in valgrind (#1327) The test is incompatible with valgrind. Added a new `--valgrind` argument to test suite, which will cause that test to be skipped. We skipped it in the past, see 5b61b0dc6d2579ee484fa6cf29bfac59513f84ab Signed-off-by: Binbin --- src/unit/README.md | 1 + src/unit/test_help.h | 4 +++- src/unit/test_main.c | 2 ++ src/unit/test_util.c | 4 +++- 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/unit/README.md b/src/unit/README.md index 1ef439eaeb..93ac82f6dc 100644 --- a/src/unit/README.md +++ b/src/unit/README.md @@ -12,6 +12,7 @@ Tests flags: * UNIT_TEST_ACCURATE: Corresponds to the --accurate flag. This flag indicates the test should use extra computation to more accurately validate the tests. * UNIT_TEST_LARGE_MEMORY: Corresponds to the --large-memory flag. This flag indicates whether or not tests should use more than 100mb of memory. * UNIT_TEST_SINGLE: Corresponds to the --single flag. This flag indicates that a single test is being executed. +* UNIT_TEST_VALGRIND: Corresponds to the --valgrind flag. This flag is just a hint passed to the test to indicate that we are running it under valgrind. Tests are allowed to be passed in additional arbitrary argv/argc, which they can access from the argc and argv arguments of the test. diff --git a/src/unit/test_help.h b/src/unit/test_help.h index 804a7e3449..51e77d19d3 100644 --- a/src/unit/test_help.h +++ b/src/unit/test_help.h @@ -18,10 +18,12 @@ /* The flags are the following: * --accurate: Runs tests with more iterations. * --large-memory: Enables tests that consume more than 100mb. - * --single: A flag to indicate a specific test file was executed. */ + * --single: A flag to indicate a specific test file was executed. + * --valgrind: Runs tests with valgrind. */ #define UNIT_TEST_ACCURATE (1 << 0) #define UNIT_TEST_LARGE_MEMORY (1 << 1) #define UNIT_TEST_SINGLE (1 << 2) +#define UNIT_TEST_VALGRIND (1 << 3) #define KRED "\33[31m" #define KGRN "\33[32m" diff --git a/src/unit/test_main.c b/src/unit/test_main.c index 277d1b42c1..1b7cd8c96d 100644 --- a/src/unit/test_main.c +++ b/src/unit/test_main.c @@ -49,6 +49,8 @@ int main(int argc, char **argv) { else if (!strcasecmp(arg, "--single") && (j + 1 < argc)) { flags |= UNIT_TEST_SINGLE; file = argv[j + 1]; + } else if (!strcasecmp(arg, "--valgrind")) { + flags |= UNIT_TEST_VALGRIND; } } diff --git a/src/unit/test_util.c b/src/unit/test_util.c index 70be0255d8..4558c38c3b 100644 --- a/src/unit/test_util.c +++ b/src/unit/test_util.c @@ -286,7 +286,9 @@ static int cache_exist(int fd) { int test_reclaimFilePageCache(int argc, char **argv, int flags) { UNUSED(argc); UNUSED(argv); - UNUSED(flags); + + /* The test is incompatible with valgrind, skip it. */ + if (flags & UNIT_TEST_VALGRIND) return 0; #if defined(__linux__) char *tmpfile = "/tmp/redis-reclaim-cache-test"; From 43b50261620fe813015a8f88717b46876c7e3f83 Mon Sep 17 00:00:00 2001 From: Sinkevich Artem Date: Fri, 22 Nov 2024 06:58:15 +0400 Subject: [PATCH 35/60] Fix argument types of formatting functions (#1253) `cluster_legacy.c`: `slot_info_pairs` has `uint16_t` values, but they were cast to `unsigned long` and `%i` was used. `valkey-cli.c`: `node->replicas_count` is `int`, not `unsigned long`. Signed-off-by: ArtSin --- src/cluster_legacy.c | 8 ++++---- src/valkey-cli.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index c618feccae..d01bfdbfe0 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -5609,12 +5609,12 @@ sds representClusterNodeFlags(sds ci, uint16_t flags) { * else each slot is added separately. */ sds representSlotInfo(sds ci, uint16_t *slot_info_pairs, int slot_info_pairs_count) { for (int i = 0; i < slot_info_pairs_count; i += 2) { - unsigned long start = slot_info_pairs[i]; - unsigned long end = slot_info_pairs[i + 1]; + unsigned int start = slot_info_pairs[i]; + unsigned int end = slot_info_pairs[i + 1]; if (start == end) { - ci = sdscatfmt(ci, " %i", start); + ci = sdscatfmt(ci, " %u", start); } else { - ci = sdscatfmt(ci, " %i-%i", start, end); + ci = sdscatfmt(ci, " %u-%u", start, end); } } return ci; diff --git a/src/valkey-cli.c b/src/valkey-cli.c index 0ba03dc6ba..dc31981483 100644 --- a/src/valkey-cli.c +++ b/src/valkey-cli.c @@ -4391,7 +4391,7 @@ static sds clusterManagerNodeInfo(clusterManagerNode *node, int indent) { if (node->replicate != NULL) info = sdscatfmt(info, "\n%s replicates %S", spaces, node->replicate); else if (node->replicas_count) - info = sdscatfmt(info, "\n%s %U additional replica(s)", spaces, node->replicas_count); + info = sdscatfmt(info, "\n%s %i additional replica(s)", spaces, node->replicas_count); sdsfree(spaces); return info; } From 18d1eb5a8554474cfc34c89f859b664e65d9b48a Mon Sep 17 00:00:00 2001 From: Nadav Levanoni <38641521+nadav-levanoni@users.noreply.github.com> Date: Thu, 21 Nov 2024 19:14:28 -0800 Subject: [PATCH 36/60] Remove redundant dict_index calculations (#1205) We need to start making use of the new `WithDictIndex` APIs which allow us to reuse the dict_index calculation (avoid over-calling `getKeySlot` for no good reason). In this PR I optimized `lookupKey` so it now calls `getKeySlot` to reuse the dict_index two additional times. It also optimizes the keys command to avoid unnecessary computation of the slot id. --------- Signed-off-by: Nadav Levanoni Co-authored-by: Nadav Levanoni --- src/db.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/src/db.c b/src/db.c index 10d4a04091..5a57863de8 100644 --- a/src/db.c +++ b/src/db.c @@ -59,6 +59,7 @@ int keyIsExpired(serverDb *db, robj *key); static void dbSetValue(serverDb *db, robj *key, robj *val, int overwrite, dictEntry *de); static int getKVStoreIndexForKey(sds key); dictEntry *dbFindExpiresWithDictIndex(serverDb *db, void *key, int dict_index); +dictEntry *dbFindWithDictIndex(serverDb *db, void *key, int dict_index); /* Update LFU when an object is accessed. * Firstly, decrement the counter if the decrement time is reached. @@ -97,7 +98,8 @@ void updateLFU(robj *val) { * expired on replicas even if the primary is lagging expiring our key via DELs * in the replication link. */ robj *lookupKey(serverDb *db, robj *key, int flags) { - dictEntry *de = dbFind(db, key->ptr); + int dict_index = getKVStoreIndexForKey(key->ptr); + dictEntry *de = dbFindWithDictIndex(db, key->ptr, dict_index); robj *val = NULL; if (de) { val = dictGetVal(de); @@ -113,7 +115,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) { int expire_flags = 0; if (flags & LOOKUP_WRITE && !is_ro_replica) expire_flags |= EXPIRE_FORCE_DELETE_EXPIRED; if (flags & LOOKUP_NOEXPIRE) expire_flags |= EXPIRE_AVOID_DELETE_EXPIRED; - if (expireIfNeeded(db, key, expire_flags) != KEY_VALID) { + if (expireIfNeededWithDictIndex(db, key, expire_flags, dict_index) != KEY_VALID) { /* The key is no longer valid. */ val = NULL; } @@ -129,7 +131,7 @@ robj *lookupKey(serverDb *db, robj *key, int flags) { if (!hasActiveChildProcess() && !(flags & LOOKUP_NOTOUCH)) { if (!canUseSharedObject() && val->refcount == OBJ_SHARED_REFCOUNT) { val = dupStringObject(val); - kvstoreDictSetVal(db->keys, getKVStoreIndexForKey(key->ptr), de, val); + kvstoreDictSetVal(db->keys, dict_index, de, val); } if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) { updateLFU(val); @@ -834,13 +836,23 @@ void keysCommand(client *c) { } else { kvs_it = kvstoreIteratorInit(c->db->keys); } - robj keyobj; - while ((de = kvs_di ? kvstoreDictIteratorNext(kvs_di) : kvstoreIteratorNext(kvs_it)) != NULL) { + while (1) { + robj keyobj; + int dict_index; + if (kvs_di) { + de = kvstoreDictIteratorNext(kvs_di); + dict_index = pslot; + } else { + de = kvstoreIteratorNext(kvs_it); + dict_index = kvstoreIteratorGetCurrentDictIndex(kvs_it); + } + if (de == NULL) break; + sds key = dictGetKey(de); if (allkeys || stringmatchlen(pattern, plen, key, sdslen(key), 0)) { initStaticStringObject(keyobj, key); - if (!keyIsExpired(c->db, &keyobj)) { + if (!keyIsExpiredWithDictIndex(c->db, &keyobj, dict_index)) { addReplyBulkCBuffer(c, key, sdslen(key)); numkeys++; } From 109d2dadc0a23326a71f58c8e312859689d6697c Mon Sep 17 00:00:00 2001 From: Yury-Fridlyand Date: Thu, 21 Nov 2024 19:19:10 -0800 Subject: [PATCH 37/60] Add slack link for users (#1273) Add slack link for users --------- Signed-off-by: Yury-Fridlyand Co-authored-by: Madelyn Olson --- .github/ISSUE_TEMPLATE/config.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 9588d36020..8c4a0a8db5 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -9,6 +9,9 @@ contact_links: - name: Chat with us on Matrix? url: https://matrix.to/#/#valkey:matrix.org about: We are on Matrix too! + - name: Chat with us on Slack? + url: https://join.slack.com/t/valkey-oss-developer/shared_invite/zt-2nxs51chx-EB9hu9Qdch3GMfRcztTSkQ + about: We are on Slack too! - name: Documentation issue? url: https://github.com/valkey-io/valkey-doc/issues about: Report it on the valkey-doc repo. From 377ed22c971878b29b6d2c2c582198f2629f82ed Mon Sep 17 00:00:00 2001 From: Alan Scherger Date: Thu, 21 Nov 2024 21:26:30 -0600 Subject: [PATCH 38/60] [feat] add Ubuntu 24.04 Noble package support (#971) add Ubuntu 24.04 Noble package support Signed-off-by: Alan Scherger --- utils/releasetools/build-config.json | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/utils/releasetools/build-config.json b/utils/releasetools/build-config.json index 5ee9308b3b..5e39fae70f 100644 --- a/utils/releasetools/build-config.json +++ b/utils/releasetools/build-config.json @@ -12,6 +12,12 @@ "type": "deb", "platform": "focal" }, + { + "arch": "x86_64", + "target": "ubuntu24.04", + "type": "deb", + "platform": "noble" + }, { "arch": "arm64", "target": "ubuntu18.04", @@ -23,6 +29,12 @@ "target": "ubuntu20.04", "type": "deb", "platform": "focal" + }, + { + "arch": "arm64", + "target": "ubuntu24.04", + "type": "deb", + "platform": "noble" } ] } \ No newline at end of file From 979f4c1ceba9eecc0f984101775b101ab87b58fc Mon Sep 17 00:00:00 2001 From: Binbin Date: Fri, 22 Nov 2024 16:49:16 +0800 Subject: [PATCH 39/60] Add cmake-build-debug and cmake-build-release to gitignore (#1340) Signed-off-by: Binbin --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index b108b4bb92..d5cac316e6 100644 --- a/.gitignore +++ b/.gitignore @@ -51,3 +51,5 @@ tests/rdma/rdma-test tags build-debug/ build-release/ +cmake-build-debug/ +cmake-build-release/ From b9d224097a46dbe62ec0857cb91e7c67505a200e Mon Sep 17 00:00:00 2001 From: Binbin Date: Sat, 23 Nov 2024 00:22:04 +0800 Subject: [PATCH 40/60] Brocast a PONG to all node in cluster when role changed (#1295) When a node role changes, we should brocast the change to notify other nodes. For example, one primary and one replica, after a failover, the replica became a new primary, the primary became a new replica. And then we trigger a second cluster failover for the new replica, the new replica will send a MFSTART to its primary, ie, the new primary. But the new primary may reject the MFSTART due to this logic: ``` } else if (type == CLUSTERMSG_TYPE_MFSTART) { if (!sender || sender->replicaof != myself) return 1; ``` In the new primary views, sender is still a primary, and sender->replicaof is NULL, so we will return. Then the manual failover timedout. Another possibility is that other primaries refuse to vote after receiving the FAILOVER_AUTH_REQUEST, since in their's views, sender is still a primary, so it refuse to vote, and then manual failover timedout. ``` void clusterSendFailoverAuthIfNeeded(clusterNode *node, clusterMsg *request) { ... if (clusterNodeIsPrimary(node)) { serverLog(LL_WARNING, "Failover auth denied to... ``` The reason is that, currently, we only update the node->replicaof information when we receive a PING/PONG from the sender. For details, see clusterProcessPacket. Therefore, in some scenarios, such as clusters with many nodes and a large cluster-ping-interval (that is, cluster-node-timeout), the role change of the node will be very delayed. Added a DEBUG DISABLE-CLUSTER-RANDOM-PING command, send cluster ping to a random node every second (see clusterCron). Signed-off-by: Binbin --- src/cluster_legacy.c | 19 +++++--- src/cluster_legacy.h | 1 + src/debug.c | 5 +++ src/server.c | 1 + src/server.h | 2 + tests/unit/cluster/manual-failover.tcl | 61 ++++++++++++++++++++++++++ 6 files changed, 83 insertions(+), 6 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index d01bfdbfe0..97150b4d23 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -2669,7 +2669,8 @@ void clusterUpdateSlotsConfigWith(clusterNode *sender, uint64_t senderConfigEpoc * * If the sender and myself are in the same shard, try psync. */ clusterSetPrimary(sender, !are_in_same_shard, !are_in_same_shard); - clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG); + clusterDoBeforeSleep(CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_FSYNC_CONFIG | + CLUSTER_TODO_BROADCAST_ALL); } else if (nodeIsPrimary(myself) && (sender_slots >= migrated_our_slots) && !are_in_same_shard) { /* When all our slots are lost to the sender and the sender belongs to * a different shard, this is likely due to a client triggered slot @@ -4538,7 +4539,7 @@ void clusterFailoverReplaceYourPrimary(void) { /* 4) Pong all the other nodes so that they can update the state * accordingly and detect that we switched to primary role. */ - clusterBroadcastPong(CLUSTER_BROADCAST_ALL); + clusterDoBeforeSleep(CLUSTER_TODO_BROADCAST_ALL); /* 5) If there was a manual failover in progress, clear the state. */ resetManualFailover(); @@ -5029,7 +5030,7 @@ void clusterCron(void) { /* Ping some random node 1 time every 10 iterations, so that we usually ping * one random node every second. */ - if (!(iteration % 10)) { + if (!server.debug_cluster_disable_random_ping && !(iteration % 10)) { int j; /* Check a few random nodes and ping the one with the oldest @@ -5206,6 +5207,13 @@ void clusterBeforeSleep(void) { int fsync = flags & CLUSTER_TODO_FSYNC_CONFIG; clusterSaveConfigOrDie(fsync); } + + if (flags & CLUSTER_TODO_BROADCAST_ALL) { + /* Broadcast a pong to all known nodes. This is useful when something changes + * in the configuration and we want to make the cluster aware it before the + * regular ping. */ + clusterBroadcastPong(CLUSTER_BROADCAST_ALL); + } } void clusterDoBeforeSleep(int flags) { @@ -6556,7 +6564,7 @@ void clusterCommandSetSlot(client *c) { } /* After importing this slot, let the other nodes know as * soon as possible. */ - clusterBroadcastPong(CLUSTER_BROADCAST_ALL); + clusterDoBeforeSleep(CLUSTER_TODO_BROADCAST_ALL); } } } @@ -6748,8 +6756,7 @@ int clusterCommandSpecial(client *c) { * If the instance is a replica, it had a totally different replication history. * In these both cases, myself as a replica has to do a full sync. */ clusterSetPrimary(n, 1, 1); - clusterBroadcastPong(CLUSTER_BROADCAST_ALL); - clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_SAVE_CONFIG); + clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_SAVE_CONFIG | CLUSTER_TODO_BROADCAST_ALL); addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "count-failure-reports") && c->argc == 3) { /* CLUSTER COUNT-FAILURE-REPORTS */ diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 2c3e1d83c8..39148c748d 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -25,6 +25,7 @@ #define CLUSTER_TODO_SAVE_CONFIG (1 << 2) #define CLUSTER_TODO_FSYNC_CONFIG (1 << 3) #define CLUSTER_TODO_HANDLE_MANUALFAILOVER (1 << 4) +#define CLUSTER_TODO_BROADCAST_ALL (1 << 5) /* clusterLink encapsulates everything needed to talk with a remote node. */ typedef struct clusterLink { diff --git a/src/debug.c b/src/debug.c index 13da7bcc93..082e20a3b6 100644 --- a/src/debug.c +++ b/src/debug.c @@ -436,6 +436,8 @@ void debugCommand(client *c) { "CLOSE-CLUSTER-LINK-ON-PACKET-DROP <0|1>", " This is valid only when DROP-CLUSTER-PACKET-FILTER is set to a valid packet type.", " When set to 1, the cluster link is closed after dropping a packet based on the filter.", + "DISABLE-CLUSTER-RANDOM-PING <0|1>", + " Disable sending cluster ping to a random node every second.", "OOM", " Crash the server simulating an out-of-memory error.", "PANIC", @@ -607,6 +609,9 @@ void debugCommand(client *c) { } else if (!strcasecmp(c->argv[1]->ptr, "close-cluster-link-on-packet-drop") && c->argc == 3) { server.debug_cluster_close_link_on_packet_drop = atoi(c->argv[2]->ptr); addReply(c, shared.ok); + } else if (!strcasecmp(c->argv[1]->ptr, "disable-cluster-random-ping") && c->argc == 3) { + server.debug_cluster_disable_random_ping = atoi(c->argv[2]->ptr); + addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "object") && (c->argc == 3 || c->argc == 4)) { dictEntry *de; robj *val; diff --git a/src/server.c b/src/server.c index 51de89ee53..87ce9b15a6 100644 --- a/src/server.c +++ b/src/server.c @@ -2693,6 +2693,7 @@ void initServer(void) { server.blocking_op_nesting = 0; server.thp_enabled = 0; server.cluster_drop_packet_filter = -1; + server.debug_cluster_disable_random_ping = 0; server.reply_buffer_peak_reset_time = REPLY_BUFFER_DEFAULT_PEAK_RESET_TIME; server.reply_buffer_resizing_enabled = 1; server.client_mem_usage_buckets = NULL; diff --git a/src/server.h b/src/server.h index 8962b04086..51ec92451d 100644 --- a/src/server.h +++ b/src/server.h @@ -2194,6 +2194,8 @@ struct valkeyServer { int cluster_slot_stats_enabled; /* Cluster slot usage statistics tracking enabled. */ /* Debug config that goes along with cluster_drop_packet_filter. When set, the link is closed on packet drop. */ uint32_t debug_cluster_close_link_on_packet_drop : 1; + /* Debug config to control the random ping. When set, we will disable the random ping in clusterCron. */ + uint32_t debug_cluster_disable_random_ping : 1; sds cached_cluster_slot_info[CACHE_CONN_TYPE_MAX]; /* Index in array is a bitwise or of CACHE_CONN_TYPE_* */ /* Scripting */ mstime_t busy_reply_threshold; /* Script / module timeout in milliseconds */ diff --git a/tests/unit/cluster/manual-failover.tcl b/tests/unit/cluster/manual-failover.tcl index bac2a7a4c7..220ffc3eaf 100644 --- a/tests/unit/cluster/manual-failover.tcl +++ b/tests/unit/cluster/manual-failover.tcl @@ -313,3 +313,64 @@ start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-ping-interval verify_no_log_message -3 "*Manual failover timed out*" $loglines2 } } ;# start_cluster + +start_cluster 3 1 {tags {external:skip cluster} overrides {cluster-ping-interval 1000 cluster-node-timeout 1000}} { + test "Broadcast PONG to the cluster when the node role changes" { + # R0 is a primary and R3 is a replica, we will do multiple cluster failover + # and then check their role and flags. + set R0_nodeid [R 0 cluster myid] + set R3_nodeid [R 3 cluster myid] + + # Make sure we don't send PINGs for a short period of time. + for {set j 0} {$j < [llength $::servers]} {incr j} { + R $j debug disable-cluster-random-ping 0 + R $j config set cluster-ping-interval 300000 + } + + R 3 cluster failover + wait_for_condition 1000 50 { + [s 0 role] eq {slave} && + [s -3 role] eq {master} + } else { + fail "Failover does not happened" + } + + # Get the node information of R0 and R3 in my view from CLUSTER NODES + # R0 should be a replica and R3 should be a primary in all views. + for {set j 0} {$j < [llength $::servers]} {incr j} { + wait_for_condition 1000 50 { + [check_cluster_node_mark slave $j $R0_nodeid] && + [check_cluster_node_mark master $j $R3_nodeid] + } else { + puts "R0_nodeid: $R0_nodeid" + puts "R3_nodeid: $R3_nodeid" + puts "R $j cluster nodes:" + puts [R $j cluster nodes] + fail "Node role does not changed in the first failover" + } + } + + R 0 cluster failover + wait_for_condition 1000 50 { + [s 0 role] eq {master} && + [s -3 role] eq {slave} + } else { + fail "The second failover does not happened" + } + + # Get the node information of R0 and R3 in my view from CLUSTER NODES + # R0 should be a primary and R3 should be a replica in all views. + for {set j 0} {$j < [llength $::servers]} {incr j} { + wait_for_condition 1000 50 { + [check_cluster_node_mark master $j $R0_nodeid] && + [check_cluster_node_mark slave $j $R3_nodeid] + } else { + puts "R0_nodeid: $R0_nodeid" + puts "R3_nodeid: $R3_nodeid" + puts "R $j cluster nodes:" + puts [R $j cluster nodes] + fail "Node role does not changed in the second failover" + } + } + } +} ;# start_cluster From 9851006d6d7af570d7f38025f4b1de68f12c7731 Mon Sep 17 00:00:00 2001 From: Binbin Date: Sat, 23 Nov 2024 00:23:38 +0800 Subject: [PATCH 41/60] Add short client info log to CLUSTER MEET / FORGET / RESET commands (#1249) These commands are all administrator commands. If they are operated incorrectly, serious consequences may occur. Print the full client info by using catClientInfoString, the info is useful when we want to identify the source of request. Since the origin client info is very large and might complicate the output, we added a catClientInfoShortString function, it will only print some basic fields, we want these fields that are useful to identify the client. These fields are: - id - addr - laddr - connection info - name - user - lib-name - lib-ver And also used it to replace the origin client info where it has the same purpose. Some logging is changed from full client info to short client info: - CLUSTER FAILOVER - FAILOVER / PSYNC - REPLICAOF NO ONE - SHUTDOWN Signed-off-by: Binbin --- src/cluster_legacy.c | 12 +++++++++++- src/networking.c | 23 +++++++++++++++++++++++ src/replication.c | 6 +++--- src/server.c | 2 +- src/server.h | 1 + 5 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index 97150b4d23..e4b25e265d 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -6596,6 +6596,10 @@ int clusterCommandSpecial(client *c) { addReplyErrorFormat(c, "Invalid node address specified: %s:%s", (char *)c->argv[2]->ptr, (char *)c->argv[3]->ptr); } else { + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); + serverLog(LL_NOTICE, "Cluster meet %s:%lld (user request from '%s').", (char *)c->argv[2]->ptr, port, + client); + sdsfree(client); addReply(c, shared.ok); } } else if (!strcasecmp(c->argv[1]->ptr, "flushslots") && c->argc == 2) { @@ -6710,6 +6714,9 @@ int clusterCommandSpecial(client *c) { addReplyError(c, "Can't forget my master!"); return 1; } + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); + serverLog(LL_NOTICE, "Cluster forget %s (user request from '%s').", (char *)c->argv[2]->ptr, client); + sdsfree(client); clusterBlacklistAddNode(n); clusterDelNode(n); clusterDoBeforeSleep(CLUSTER_TODO_UPDATE_STATE | CLUSTER_TODO_SAVE_CONFIG); @@ -6798,7 +6805,7 @@ int clusterCommandSpecial(client *c) { } resetManualFailover(); server.cluster->mf_end = mstime() + CLUSTER_MF_TIMEOUT; - sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log); + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); if (takeover) { /* A takeover does not perform any initial check. It just @@ -6877,6 +6884,9 @@ int clusterCommandSpecial(client *c) { "master nodes containing keys"); return 1; } + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); + serverLog(LL_NOTICE, "Cluster reset (user request from '%s').", client); + sdsfree(client); clusterReset(hard); addReply(c, shared.ok); } else if (!strcasecmp(c->argv[1]->ptr, "links") && c->argc == 2) { diff --git a/src/networking.c b/src/networking.c index 9558780f39..93aa9d00ae 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3385,6 +3385,29 @@ sds catClientInfoString(sds s, client *client, int hide_user_data) { return ret; } +/* Concatenate a string representing the state of a client in a human + * readable format, into the sds string 's'. + * + * This is a simplified and shortened version of catClientInfoString, + * it only added some basic fields for tracking clients. */ +sds catClientInfoShortString(sds s, client *client, int hide_user_data) { + if (!server.crashed) waitForClientIO(client); + char conninfo[CONN_INFO_LEN]; + + sds ret = sdscatfmt( + s, + FMTARGS( + "id=%U", (unsigned long long)client->id, + " addr=%s", getClientPeerId(client), + " laddr=%s", getClientSockname(client), + " %s", connGetInfo(client->conn, conninfo, sizeof(conninfo)), + " name=%s", hide_user_data ? "*redacted*" : (client->name ? (char *)client->name->ptr : ""), + " user=%s", hide_user_data ? "*redacted*" : (client->user ? client->user->name : "(superuser)"), + " lib-name=%s", client->lib_name ? (char *)client->lib_name->ptr : "", + " lib-ver=%s", client->lib_ver ? (char *)client->lib_ver->ptr : "")); + return ret; +} + sds getAllClientsInfoString(int type, int hide_user_data) { listNode *ln; listIter li; diff --git a/src/replication.c b/src/replication.c index 437ae278ec..1654847bd6 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1051,7 +1051,7 @@ void syncCommand(client *c) { } else { replicationUnsetPrimary(); } - sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log); + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); serverLog(LL_NOTICE, "PRIMARY MODE enabled (failover request from '%s')", client); sdsfree(client); } else { @@ -3971,7 +3971,7 @@ void replicaofCommand(client *c) { if (!strcasecmp(c->argv[1]->ptr, "no") && !strcasecmp(c->argv[2]->ptr, "one")) { if (server.primary_host) { replicationUnsetPrimary(); - sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log); + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); serverLog(LL_NOTICE, "PRIMARY MODE enabled (user request from '%s')", client); sdsfree(client); } @@ -4000,7 +4000,7 @@ void replicaofCommand(client *c) { /* There was no previous primary or the user specified a different one, * we can continue. */ replicationSetPrimary(c->argv[1]->ptr, port, 0); - sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log); + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); serverLog(LL_NOTICE, "REPLICAOF %s:%d enabled (user request from '%s')", server.primary_host, server.primary_port, client); sdsfree(client); diff --git a/src/server.c b/src/server.c index 87ce9b15a6..6d346ac74c 100644 --- a/src/server.c +++ b/src/server.c @@ -4325,7 +4325,7 @@ int prepareForShutdown(client *c, int flags) { server.shutdown_flags = flags; if (c != NULL) { - sds client = catClientInfoString(sdsempty(), c, server.hide_user_data_from_log); + sds client = catClientInfoShortString(sdsempty(), c, server.hide_user_data_from_log); serverLog(LL_NOTICE, "User requested shutdown... (user request from '%s')", client); sdsfree(client); } else { diff --git a/src/server.h b/src/server.h index 51ec92451d..f4c7306009 100644 --- a/src/server.h +++ b/src/server.h @@ -2853,6 +2853,7 @@ char *getClientPeerId(client *client); char *getClientSockName(client *client); int isClientConnIpV6(client *c); sds catClientInfoString(sds s, client *client, int hide_user_data); +sds catClientInfoShortString(sds s, client *client, int hide_user_data); sds getAllClientsInfoString(int type, int hide_user_data); int clientSetName(client *c, robj *name, const char **err); void rewriteClientCommandVector(client *c, int argc, ...); From 33f42d7fb597ce28040f184ee57ed86d6f6ffbd8 Mon Sep 17 00:00:00 2001 From: eifrah-aws Date: Fri, 22 Nov 2024 22:17:53 +0200 Subject: [PATCH 42/60] CMake fixes + README update (#1276) --- CMakeLists.txt | 2 +- README.md | 14 +++++------ cmake/Modules/Utils.cmake | 13 ++++++++++ cmake/Modules/ValkeySetup.cmake | 43 ++++++++++++++++++--------------- deps/jemalloc/CMakeLists.txt | 13 ++++++++-- deps/lua/CMakeLists.txt | 9 +++++++ src/CMakeLists.txt | 10 ++++++++ 7 files changed, 75 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ad0bab8896..77d0c4e7d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.20) +cmake_minimum_required(VERSION 3.10) # Must be done first if (APPLE) diff --git a/README.md b/README.md index 94f38bccf7..a32ac255df 100644 --- a/README.md +++ b/README.md @@ -297,19 +297,19 @@ Other options supported by Valkey's `CMake` build system: ## Special build flags -- `-DBUILD_TLS=` enable TLS build for Valkey -- `-DBUILD_RDMA=` enable RDMA module build (only module mode supported) +- `-DBUILD_TLS=` enable TLS build for Valkey. Default: `no` +- `-DBUILD_RDMA=` enable RDMA module build (only module mode supported). Default: `no` - `-DBUILD_MALLOC=` choose the allocator to use. Default on Linux: `jemalloc`, for other OS: `libc` -- `-DBUILD_SANITIZER=` build with address sanitizer enabled -- `-DBUILD_UNIT_TESTS=[1|0]` when set, the build will produce the executable `valkey-unit-tests` -- `-DBUILD_TEST_MODULES=[1|0]` when set, the build will include the modules located under the `tests/modules` folder -- `-DBUILD_EXAMPLE_MODULES=[1|0]` when set, the build will include the example modules located under the `src/modules` folder +- `-DBUILD_SANITIZER=` build with address sanitizer enabled. Default: disabled (no sanitizer) +- `-DBUILD_UNIT_TESTS=[yes|no]` when set, the build will produce the executable `valkey-unit-tests`. Default: `no` +- `-DBUILD_TEST_MODULES=[yes|no]` when set, the build will include the modules located under the `tests/modules` folder. Default: `no` +- `-DBUILD_EXAMPLE_MODULES=[yes|no]` when set, the build will include the example modules located under the `src/modules` folder. Default: `no` ## Common flags - `-DCMAKE_BUILD_TYPE=` define the build type, see CMake manual for more details - `-DCMAKE_INSTALL_PREFIX=/installation/path` override this value to define a custom install prefix. Default: `/usr/local` -- `-G` generate build files for "Generator Name". By default, CMake will generate `Makefile`s. +- `-G""` generate build files for "Generator Name". By default, CMake will generate `Makefile`s. ## Verbose build diff --git a/cmake/Modules/Utils.cmake b/cmake/Modules/Utils.cmake index 304f39fb2c..59076397de 100644 --- a/cmake/Modules/Utils.cmake +++ b/cmake/Modules/Utils.cmake @@ -100,3 +100,16 @@ function (valkey_parse_build_option OPTION_VALUE OUT_ARG_ENUM) PARENT_SCOPE) endif () endfunction () + +function (valkey_pkg_config PKGNAME OUT_VARIABLE) + if (NOT FOUND_PKGCONFIG) + # Locate pkg-config once + find_package(PkgConfig REQUIRED) + set(FOUND_PKGCONFIG 1) + endif () + pkg_check_modules(__PREFIX REQUIRED ${PKGNAME}) + message(STATUS "Found library for '${PKGNAME}': ${__PREFIX_LIBRARIES}") + set(${OUT_VARIABLE} + "${__PREFIX_LIBRARIES}" + PARENT_SCOPE) +endfunction () diff --git a/cmake/Modules/ValkeySetup.cmake b/cmake/Modules/ValkeySetup.cmake index e935c3b308..4fafd07910 100644 --- a/cmake/Modules/ValkeySetup.cmake +++ b/cmake/Modules/ValkeySetup.cmake @@ -74,9 +74,11 @@ endmacro () macro (valkey_build_and_install_bin target sources ld_flags libs link_name) add_executable(${target} ${sources}) - if (USE_JEMALLOC) - # Using jemalloc - target_link_libraries(${target} jemalloc) + if (USE_JEMALLOC + OR USE_TCMALLOC + OR USE_TCMALLOC_MINIMAL) + # Using custom allocator + target_link_libraries(${target} ${ALLOCATOR_LIB}) endif () # Place this line last to ensure that ${ld_flags} is placed last on the linker line @@ -151,16 +153,23 @@ endif () if (BUILD_MALLOC) if ("${BUILD_MALLOC}" STREQUAL "jemalloc") set(MALLOC_LIB "jemalloc") + set(ALLOCATOR_LIB "jemalloc") add_valkey_server_compiler_options("-DUSE_JEMALLOC") set(USE_JEMALLOC 1) elseif ("${BUILD_MALLOC}" STREQUAL "libc") set(MALLOC_LIB "libc") elseif ("${BUILD_MALLOC}" STREQUAL "tcmalloc") set(MALLOC_LIB "tcmalloc") + valkey_pkg_config(libtcmalloc ALLOCATOR_LIB) + add_valkey_server_compiler_options("-DUSE_TCMALLOC") + set(USE_TCMALLOC 1) elseif ("${BUILD_MALLOC}" STREQUAL "tcmalloc_minimal") set(MALLOC_LIB "tcmalloc_minimal") + valkey_pkg_config(libtcmalloc_minimal ALLOCATOR_LIB) + add_valkey_server_compiler_options("-DUSE_TCMALLOC") + set(USE_TCMALLOC_MINIMAL 1) else () message(FATAL_ERROR "BUILD_MALLOC can be one of: jemalloc, libc, tcmalloc or tcmalloc_minimal") endif () @@ -202,16 +211,12 @@ if (BUILD_RDMA) if (USE_RDMA EQUAL 2) # Module message(STATUS "Building RDMA as module") add_valkey_server_compiler_options("-DUSE_RDMA=2") - find_package(PkgConfig REQUIRED) # Locate librdmacm & libibverbs, fail if we can't find them - pkg_check_modules(RDMACM REQUIRED librdmacm) - pkg_check_modules(IBVERBS REQUIRED libibverbs) + valkey_pkg_config(librdmacm RDMACM_LIBS) + valkey_pkg_config(libibverbs IBVERBS_LIBS) - message(STATUS "${RDMACM_LINK_LIBRARIES};${IBVERBS_LINK_LIBRARIES}") - list(APPEND RDMA_LIBS "${RDMACM_LIBRARIES};${IBVERBS_LIBRARIES}") - unset(RDMACM_LINK_LIBRARIES CACHE) - unset(IBVERBS_LINK_LIBRARIES CACHE) + list(APPEND RDMA_LIBS "${RDMACM_LIBS};${IBVERBS_LIBS}") set(BUILD_RDMA_MODULE 1) elseif (USE_RDMA EQUAL 1) # RDMA can only be built as a module. So disable it @@ -266,17 +271,18 @@ endif () # Sanitizer if (BUILD_SANITIZER) - # For best results, force libc - set(MALLOC_LIB, "libc") + # Common CFLAGS + list(APPEND VALKEY_SANITAIZER_CFLAGS "-fno-sanitize-recover=all") + list(APPEND VALKEY_SANITAIZER_CFLAGS "-fno-omit-frame-pointer") if ("${BUILD_SANITIZER}" STREQUAL "address") - add_valkey_server_compiler_options("-fsanitize=address -fno-sanitize-recover=all -fno-omit-frame-pointer") - add_valkey_server_linker_option("-fsanitize=address") + list(APPEND VALKEY_SANITAIZER_CFLAGS "-fsanitize=address") + list(APPEND VALKEY_SANITAIZER_LDFLAGS "-fsanitize=address") elseif ("${BUILD_SANITIZER}" STREQUAL "thread") - add_valkey_server_compiler_options("-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer") - add_valkey_server_linker_option("-fsanitize=thread") + list(APPEND VALKEY_SANITAIZER_CFLAGS "-fsanitize=thread") + list(APPEND VALKEY_SANITAIZER_LDFLAGS "-fsanitize=thread") elseif ("${BUILD_SANITIZER}" STREQUAL "undefined") - add_valkey_server_compiler_options("-fsanitize=undefined -fno-sanitize-recover=all -fno-omit-frame-pointer") - add_valkey_server_linker_option("-fsanitize=undefined") + list(APPEND VALKEY_SANITAIZER_CFLAGS "-fsanitize=undefined") + list(APPEND VALKEY_SANITAIZER_LDFLAGS "-fsanitize=undefined") else () message(FATAL_ERROR "Unknown sanitizer: ${BUILD_SANITIZER}") endif () @@ -366,7 +372,6 @@ include(SourceFiles) # Clear the below variables from the cache unset(CMAKE_C_FLAGS CACHE) -unset(BUILD_SANITIZER CACHE) unset(VALKEY_SERVER_LDFLAGS CACHE) unset(VALKEY_SERVER_CFLAGS CACHE) unset(PYTHON_EXE CACHE) diff --git a/deps/jemalloc/CMakeLists.txt b/deps/jemalloc/CMakeLists.txt index e79e960ec2..0fa99df55e 100644 --- a/deps/jemalloc/CMakeLists.txt +++ b/deps/jemalloc/CMakeLists.txt @@ -12,9 +12,18 @@ if (NOT EXISTS ${JEMALLOC_INSTALL_DIR}/lib/libjemalloc.a) COMMAND sh -c "${JEMALLOC_SRC_DIR}/configure --disable-cxx \ --with-version=5.3.0-0-g0 --with-lg-quantum=3 --disable-cache-oblivious --with-jemalloc-prefix=je_ \ --enable-static --disable-shared --prefix=${JEMALLOC_INSTALL_DIR}" - WORKING_DIRECTORY ${JEMALLOC_SRC_DIR} COMMAND_ERROR_IS_FATAL ANY) + WORKING_DIRECTORY ${JEMALLOC_SRC_DIR} RESULTS_VARIABLE CONFIGURE_RESULT) + + if (NOT ${CONFIGURE_RESULT} EQUAL 0) + message(FATAL_ERROR "Jemalloc configure failed") + endif () + execute_process(COMMAND make -j${VALKEY_PROCESSOR_COUNT} lib/libjemalloc.a install - WORKING_DIRECTORY "${JEMALLOC_SRC_DIR}") + WORKING_DIRECTORY "${JEMALLOC_SRC_DIR}" RESULTS_VARIABLE MAKE_RESULT) + + if (NOT ${MAKE_RESULT} EQUAL 0) + message(FATAL_ERROR "Jemalloc build failed") + endif () endif () # Import the compiled library as a CMake target diff --git a/deps/lua/CMakeLists.txt b/deps/lua/CMakeLists.txt index e911de9232..0629d7f978 100644 --- a/deps/lua/CMakeLists.txt +++ b/deps/lua/CMakeLists.txt @@ -1,5 +1,7 @@ project(lualib) +include(CheckFunctionExists) + set(LUA_SRC_DIR "${CMAKE_CURRENT_LIST_DIR}/src") set(LUA_SRCS ${LUA_SRC_DIR}/fpconv.c @@ -42,3 +44,10 @@ set(LUA_SRCS add_library(lualib STATIC "${LUA_SRCS}") target_include_directories(lualib PUBLIC "${LUA_SRC_DIR}") target_compile_definitions(lualib PRIVATE ENABLE_CJSON_GLOBAL) + +# Use mkstemp if available +check_function_exists(mkstemp HAVE_MKSTEMP) +if (HAVE_MKSTEMP) + target_compile_definitions(lualib PRIVATE LUA_USE_MKSTEMP) +endif () +unset(HAVE_MKSTEMP CACHE) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b7e328163b..51e1b5a2e6 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,6 +22,16 @@ if (VALKEY_RELEASE_BUILD) set_property(TARGET valkey-server PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) endif () +if (BUILD_SANITIZER) + # 'BUILD_SANITIZER' is defined in ValkeySetup module (based on user input) + # If defined, the variables 'VALKEY_SANITAIZER_CFLAGS' and 'VALKEY_SANITAIZER_LDFLAGS' + # are set with the link & compile flags required + message(STATUS "Adding sanitizer flags for target valkey-server") + target_compile_options(valkey-server PRIVATE ${VALKEY_SANITAIZER_CFLAGS}) + target_link_options(valkey-server PRIVATE ${VALKEY_SANITAIZER_LDFLAGS}) +endif () +unset(BUILD_SANITIZER CACHE) + # Target: valkey-cli list(APPEND CLI_LIBS "linenoise") valkey_build_and_install_bin(valkey-cli "${VALKEY_CLI_SRCS}" "${VALKEY_SERVER_LDFLAGS}" "${CLI_LIBS}" "redis-cli") From 653d5f7fe3d44adfb2a2e10c9110a3efacd3f0da Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 25 Nov 2024 09:59:37 +0800 Subject: [PATCH 43/60] Support empty callback on function and free temp function in async way (#1334) We have a replicationEmptyDbCallback, it is a callback used by emptyData while flushing away old data. Previously, we did not add this callback logic for function, in case of abuse, there may be a lot of functions, and also to make the code consistent, we add the same callback logic for function. Changes around this commit: 1. Extend emptyData / functionsLibCtxClear to support passing callback when flushing functions. 2. Added disklessLoad function create and discard helper function, just like disklessLoadInitTempDb and disklessLoadDiscardTempDb), we wll always flush the temp function in a async way to avoid any block. 3. Cleanup around discardTempDb, remove the callback pointer since in async way we don't need the callback. 4. Remove functionsLibCtxClear call in readSyncBulkPayload, because we called emptyData in the previous lines, which also empty functions. We are doing this callback in replication is because during the flush, replica may block a while if the flush is doing in the sync way, to avoid the primary to detect the replica is timing out, replica will use this callback to notify the primary (we also do this callback when loading a RDB). And in the async way, we empty the data in the bio and there is no slw operation, so it will ignores the callback. Signed-off-by: Binbin --- src/db.c | 10 ++++------ src/functions.c | 16 ++++++++-------- src/functions.h | 4 ++-- src/replication.c | 22 +++++++++++++++++----- src/server.h | 2 +- 5 files changed, 32 insertions(+), 22 deletions(-) diff --git a/src/db.c b/src/db.c index 5a57863de8..d3ef19027d 100644 --- a/src/db.c +++ b/src/db.c @@ -574,7 +574,7 @@ long long emptyData(int dbnum, int flags, void(callback)(dict *)) { if (with_functions) { serverAssert(dbnum == -1); - functionsLibCtxClearCurrent(async); + functionsLibCtxClearCurrent(async, callback); } /* Also fire the end event. Note that this event will fire almost @@ -602,12 +602,10 @@ serverDb *initTempDb(void) { return tempDb; } -/* Discard tempDb, this can be slow (similar to FLUSHALL), but it's always async. */ -void discardTempDb(serverDb *tempDb, void(callback)(dict *)) { - int async = 1; - +/* Discard tempDb, it's always async. */ +void discardTempDb(serverDb *tempDb) { /* Release temp DBs. */ - emptyDbStructure(tempDb, -1, async, callback); + emptyDbStructure(tempDb, -1, 1, NULL); for (int i = 0; i < server.dbnum; i++) { kvstoreRelease(tempDb[i].keys); kvstoreRelease(tempDb[i].expires); diff --git a/src/functions.c b/src/functions.c index 916d8fd622..b694e35252 100644 --- a/src/functions.c +++ b/src/functions.c @@ -161,9 +161,9 @@ static void engineLibraryDispose(void *obj) { } /* Clear all the functions from the given library ctx */ -void functionsLibCtxClear(functionsLibCtx *lib_ctx) { - dictEmpty(lib_ctx->functions, NULL); - dictEmpty(lib_ctx->libraries, NULL); +void functionsLibCtxClear(functionsLibCtx *lib_ctx, void(callback)(dict *)) { + dictEmpty(lib_ctx->functions, callback); + dictEmpty(lib_ctx->libraries, callback); dictIterator *iter = dictGetIterator(lib_ctx->engines_stats); dictEntry *entry = NULL; while ((entry = dictNext(iter))) { @@ -175,13 +175,13 @@ void functionsLibCtxClear(functionsLibCtx *lib_ctx) { lib_ctx->cache_memory = 0; } -void functionsLibCtxClearCurrent(int async) { +void functionsLibCtxClearCurrent(int async, void(callback)(dict *)) { if (async) { functionsLibCtx *old_l_ctx = curr_functions_lib_ctx; curr_functions_lib_ctx = functionsLibCtxCreate(); freeFunctionsAsync(old_l_ctx); } else { - functionsLibCtxClear(curr_functions_lib_ctx); + functionsLibCtxClear(curr_functions_lib_ctx, callback); } } @@ -196,7 +196,7 @@ static void functionsLibCtxFreeGeneric(functionsLibCtx *functions_lib_ctx, int a /* Free the given functions ctx */ void functionsLibCtxFree(functionsLibCtx *functions_lib_ctx) { - functionsLibCtxClear(functions_lib_ctx); + functionsLibCtxClear(functions_lib_ctx, NULL); dictRelease(functions_lib_ctx->functions); dictRelease(functions_lib_ctx->libraries); dictRelease(functions_lib_ctx->engines_stats); @@ -380,7 +380,7 @@ libraryJoin(functionsLibCtx *functions_lib_ctx_dst, functionsLibCtx *functions_l dictReleaseIterator(iter); iter = NULL; - functionsLibCtxClear(functions_lib_ctx_src); + functionsLibCtxClear(functions_lib_ctx_src, NULL); if (old_libraries_list) { listRelease(old_libraries_list); old_libraries_list = NULL; @@ -820,7 +820,7 @@ void functionFlushCommand(client *c) { return; } - functionsLibCtxClearCurrent(async); + functionsLibCtxClearCurrent(async, NULL); /* Indicate that the command changed the data so it will be replicated and * counted as a data change (for persistence configuration) */ diff --git a/src/functions.h b/src/functions.h index 429405bb2d..b199fbd06e 100644 --- a/src/functions.h +++ b/src/functions.h @@ -133,9 +133,9 @@ dict *functionsLibGet(void); size_t functionsLibCtxFunctionsLen(functionsLibCtx *functions_ctx); functionsLibCtx *functionsLibCtxGetCurrent(void); functionsLibCtx *functionsLibCtxCreate(void); -void functionsLibCtxClearCurrent(int async); +void functionsLibCtxClearCurrent(int async, void(callback)(dict *)); void functionsLibCtxFree(functionsLibCtx *functions_lib_ctx); -void functionsLibCtxClear(functionsLibCtx *lib_ctx); +void functionsLibCtxClear(functionsLibCtx *lib_ctx, void(callback)(dict *)); void functionsLibCtxSwapWithCurrent(functionsLibCtx *new_lib_ctx, int async); int functionLibCreateFunction(sds name, void *function, functionLibInfo *li, sds desc, uint64_t f_flags, sds *err); diff --git a/src/replication.c b/src/replication.c index 1654847bd6..dcf7ee3f8c 100644 --- a/src/replication.c +++ b/src/replication.c @@ -1981,7 +1981,20 @@ serverDb *disklessLoadInitTempDb(void) { /* Helper function for readSyncBulkPayload() to discard our tempDb * when the loading succeeded or failed. */ void disklessLoadDiscardTempDb(serverDb *tempDb) { - discardTempDb(tempDb, replicationEmptyDbCallback); + discardTempDb(tempDb); +} + +/* Helper function for to initialize temp function lib context. + * The temp ctx may be populated by functionsLibCtxSwapWithCurrent or + * freed by disklessLoadDiscardFunctionsLibCtx later. */ +functionsLibCtx *disklessLoadFunctionsLibCtxCreate(void) { + return functionsLibCtxCreate(); +} + +/* Helper function to discard our temp function lib context + * when the loading succeeded or failed. */ +void disklessLoadDiscardFunctionsLibCtx(functionsLibCtx *temp_functions_lib_ctx) { + freeFunctionsAsync(temp_functions_lib_ctx); } /* If we know we got an entirely different data set from our primary @@ -2186,7 +2199,7 @@ void readSyncBulkPayload(connection *conn) { if (use_diskless_load && server.repl_diskless_load == REPL_DISKLESS_LOAD_SWAPDB) { /* Initialize empty tempDb dictionaries. */ diskless_load_tempDb = disklessLoadInitTempDb(); - temp_functions_lib_ctx = functionsLibCtxCreate(); + temp_functions_lib_ctx = disklessLoadFunctionsLibCtxCreate(); moduleFireServerEvent(VALKEYMODULE_EVENT_REPL_ASYNC_LOAD, VALKEYMODULE_SUBEVENT_REPL_ASYNC_LOAD_STARTED, NULL); } @@ -2226,7 +2239,6 @@ void readSyncBulkPayload(connection *conn) { dbarray = server.db; functions_lib_ctx = functionsLibCtxGetCurrent(); - functionsLibCtxClear(functions_lib_ctx); } rioInitWithConn(&rdb, conn, server.repl_transfer_size); @@ -2264,7 +2276,7 @@ void readSyncBulkPayload(connection *conn) { NULL); disklessLoadDiscardTempDb(diskless_load_tempDb); - functionsLibCtxFree(temp_functions_lib_ctx); + disklessLoadDiscardFunctionsLibCtx(temp_functions_lib_ctx); serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Discarding temporary DB in background"); } else { /* Remove the half-loaded data in case we started with an empty replica. */ @@ -2289,7 +2301,7 @@ void readSyncBulkPayload(connection *conn) { swapMainDbWithTempDb(diskless_load_tempDb); /* swap existing functions ctx with the temporary one */ - functionsLibCtxSwapWithCurrent(temp_functions_lib_ctx, 0); + functionsLibCtxSwapWithCurrent(temp_functions_lib_ctx, 1); moduleFireServerEvent(VALKEYMODULE_EVENT_REPL_ASYNC_LOAD, VALKEYMODULE_SUBEVENT_REPL_ASYNC_LOAD_COMPLETED, NULL); diff --git a/src/server.h b/src/server.h index f4c7306009..09b67b2670 100644 --- a/src/server.h +++ b/src/server.h @@ -3572,7 +3572,7 @@ long long emptyDbStructure(serverDb *dbarray, int dbnum, int async, void(callbac void flushAllDataAndResetRDB(int flags); long long dbTotalServerKeyCount(void); serverDb *initTempDb(void); -void discardTempDb(serverDb *tempDb, void(callback)(dict *)); +void discardTempDb(serverDb *tempDb); int selectDb(client *c, int id); From c4920bca4a6681b2ba652e4dc52b72fe47db516a Mon Sep 17 00:00:00 2001 From: Parth <661497+parthpatel@users.noreply.github.com> Date: Mon, 25 Nov 2024 01:01:43 -0800 Subject: [PATCH 44/60] Integrating fast_float to optionally replace strtod (#1260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fast_float is a C++ header-only library to parse doubles using SIMD instructions. The purpose is to speed up sorted sets and other commands that use doubles. A single-file copy of fast_float is included in this repo. This introduces an optional dependency on a C++ compiler. The use of fast_float is enabled at compile time using the make variable `USE_FAST_FLOAT=yes`. It is disabled by default. Fixes #1069. --------- Signed-off-by: Parth Patel <661497+parthpatel@users.noreply.github.com> Signed-off-by: Parth <661497+parthpatel@users.noreply.github.com> Signed-off-by: Madelyn Olson Signed-off-by: Viktor Söderqvist Co-authored-by: Roshan Swain Co-authored-by: Madelyn Olson Co-authored-by: Viktor Söderqvist --- .github/workflows/ci.yml | 21 +- .github/workflows/daily.yml | 2 +- deps/Makefile | 7 + deps/README.md | 15 + deps/fast_float/fast_float.h | 3912 +++++++++++++++++ deps/fast_float_c_interface/Makefile | 37 + .../fast_float_strtod.cpp | 24 + src/Makefile | 13 +- src/debug.c | 4 +- src/resp_parser.c | 8 +- src/sort.c | 8 +- src/t_zset.c | 12 +- src/unit/test_files.h | 3 + src/unit/test_valkey_strtod.c | 36 + src/util.c | 8 +- src/valkey-cli.c | 7 +- src/valkey_strtod.h | 42 + tests/test_helper.tcl | 6 +- 18 files changed, 4136 insertions(+), 29 deletions(-) create mode 100644 deps/fast_float/fast_float.h create mode 100644 deps/fast_float_c_interface/Makefile create mode 100644 deps/fast_float_c_interface/fast_float_strtod.cpp create mode 100644 src/unit/test_valkey_strtod.c create mode 100644 src/valkey_strtod.h diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bc946b7193..3fec424cee 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,7 +17,7 @@ jobs: - name: make # Fail build if there are warnings # build with TLS just for compilation coverage - run: make -j4 all-with-unit-tests SERVER_CFLAGS='-Werror' BUILD_TLS=yes + run: make -j4 all-with-unit-tests SERVER_CFLAGS='-Werror' BUILD_TLS=yes USE_FAST_FLOAT=yes - name: test run: | sudo apt-get install tcl8.6 tclx @@ -108,23 +108,30 @@ jobs: steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: make - run: make -j3 all-with-unit-tests SERVER_CFLAGS='-Werror' + # Build with additional upcoming features + run: make -j3 all-with-unit-tests SERVER_CFLAGS='-Werror' USE_FAST_FLOAT=yes build-32bit: runs-on: ubuntu-latest steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: make + # Fast float requires C++ 32-bit libraries to compile on 64-bit ubuntu + # machine i.e. "-cross" suffixed version. Cross-compiling c++ to 32-bit + # also requires multilib support for g++ compiler i.e. "-multilib" + # suffixed version of g++. g++-multilib generally includes libstdc++. + # *cross version as well, but it is also added explicitly just in case. run: | - sudo apt-get update && sudo apt-get install libc6-dev-i386 - make -j4 SERVER_CFLAGS='-Werror' 32bit + sudo apt-get update + sudo apt-get install libc6-dev-i386 libstdc++-11-dev-i386-cross gcc-multilib g++-multilib + make -j4 SERVER_CFLAGS='-Werror' 32bit USE_FAST_FLOAT=yes build-libc-malloc: runs-on: ubuntu-latest steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - name: make - run: make -j4 SERVER_CFLAGS='-Werror' MALLOC=libc + run: make -j4 SERVER_CFLAGS='-Werror' MALLOC=libc USE_FAST_FLOAT=yes build-almalinux8-jemalloc: runs-on: ubuntu-latest @@ -134,8 +141,8 @@ jobs: - name: make run: | - dnf -y install epel-release gcc make procps-ng which - make -j4 SERVER_CFLAGS='-Werror' + dnf -y install epel-release gcc gcc-c++ make procps-ng which + make -j4 SERVER_CFLAGS='-Werror' USE_FAST_FLOAT=yes format-yaml: runs-on: ubuntu-latest diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index 8bdbc8d4c2..e39e672689 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -319,7 +319,7 @@ jobs: ref: ${{ env.GITHUB_HEAD_REF }} - name: make run: | - make BUILD_TLS=yes SERVER_CFLAGS='-Werror' + make BUILD_TLS=yes SERVER_CFLAGS='-Werror' USE_FAST_FLOAT=yes - name: testprep run: | sudo apt-get install tcl8.6 tclx tcl-tls diff --git a/deps/Makefile b/deps/Makefile index f1e4bd6ce2..72389def95 100644 --- a/deps/Makefile +++ b/deps/Makefile @@ -42,6 +42,7 @@ distclean: -(cd jemalloc && [ -f Makefile ] && $(MAKE) distclean) > /dev/null || true -(cd hdr_histogram && $(MAKE) clean) > /dev/null || true -(cd fpconv && $(MAKE) clean) > /dev/null || true + -(cd fast_float_c_interface && $(MAKE) clean) > /dev/null || true -(rm -f .make-*) .PHONY: distclean @@ -116,3 +117,9 @@ jemalloc: .make-prerequisites cd jemalloc && $(MAKE) lib/libjemalloc.a .PHONY: jemalloc + +fast_float_c_interface: .make-prerequisites + @printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR) + cd fast_float_c_interface && $(MAKE) + +.PHONY: fast_float_c_interface diff --git a/deps/README.md b/deps/README.md index b918b47456..97a7baf64b 100644 --- a/deps/README.md +++ b/deps/README.md @@ -6,6 +6,7 @@ should be provided by the operating system. * **linenoise** is a readline replacement. It is developed by the same authors of Valkey but is managed as a separated project and updated as needed. * **lua** is Lua 5.1 with minor changes for security and additional libraries. * **hdr_histogram** Used for per-command latency tracking histograms. +* **fast_float** is a replacement for strtod to convert strings to floats efficiently. How to upgrade the above dependencies === @@ -105,3 +106,17 @@ We use a customized version based on master branch commit e4448cf6d1cd08fff51981 2. Copy updated files from newer version onto files in /hdr_histogram. 3. Apply the changes from 1 above to the updated files. +fast_float +--- +The fast_float library provides fast header-only implementations for the C++ from_chars functions for `float` and `double` types as well as integer types. These functions convert ASCII strings representing decimal values (e.g., `1.3e10`) into binary types. The functions are much faster than comparable number-parsing functions from existing C++ standard libraries. + +Specifically, `fast_float` provides the following function to parse floating-point numbers with a C++17-like syntax (the library itself only requires C++11): + + template ())> + from_chars_result_t from_chars(UC const *first, UC const *last, T &value, chars_format fmt = chars_format::general); + +To upgrade the library, +1. Check out https://github.com/fastfloat/fast_float/tree/main +2. cd fast_float +3. Invoke "python3 ./script/amalgamate.py --output fast_float.h" +4. Copy fast_float.h file to "deps/fast_float/". diff --git a/deps/fast_float/fast_float.h b/deps/fast_float/fast_float.h new file mode 100644 index 0000000000..9ba3bc2e97 --- /dev/null +++ b/deps/fast_float/fast_float.h @@ -0,0 +1,3912 @@ +// fast_float by Daniel Lemire +// fast_float by João Paulo Magalhaes +// +// +// with contributions from Eugene Golushkov +// with contributions from Maksim Kita +// with contributions from Marcin Wojdyr +// with contributions from Neal Richardson +// with contributions from Tim Paine +// with contributions from Fabio Pellacini +// with contributions from Lénárd Szolnoki +// with contributions from Jan Pharago +// with contributions from Maya Warrier +// with contributions from Taha Khokhar +// +// +// Licensed under the Apache License, Version 2.0, or the +// MIT License or the Boost License. This file may not be copied, +// modified, or distributed except according to those terms. +// +// MIT License Notice +// +// MIT License +// +// Copyright (c) 2021 The fast_float authors +// +// Permission is hereby granted, free of charge, to any +// person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the +// Software without restriction, including without +// limitation the rights to use, copy, modify, merge, +// publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software +// is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice +// shall be included in all copies or substantial portions +// of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +// TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +// SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +// IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Apache License (Version 2.0) Notice +// +// Copyright 2021 The fast_float authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// +// BOOST License Notice +// +// Boost Software License - Version 1.0 - August 17th, 2003 +// +// Permission is hereby granted, free of charge, to any person or organization +// obtaining a copy of the software and accompanying documentation covered by +// this license (the "Software") to use, reproduce, display, distribute, +// execute, and transmit the Software, and to prepare derivative works of the +// Software, and to permit third-parties to whom the Software is furnished to +// do so, all subject to the following: +// +// The copyright notices in the Software and this entire statement, including +// the above license grant, this restriction and the following disclaimer, +// must be included in all copies of the Software, in whole or in part, and +// all derivative works of the Software, unless such copies or derivative +// works are solely in the form of machine-executable object code generated by +// a source language processor. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +// SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +// FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// + +#ifndef FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H +#define FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H + +#ifdef __has_include +#if __has_include() +#include +#endif +#endif + +// Testing for https://wg21.link/N3652, adopted in C++14 +#if __cpp_constexpr >= 201304 +#define FASTFLOAT_CONSTEXPR14 constexpr +#else +#define FASTFLOAT_CONSTEXPR14 +#endif + +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#define FASTFLOAT_HAS_BIT_CAST 1 +#else +#define FASTFLOAT_HAS_BIT_CAST 0 +#endif + +#if defined(__cpp_lib_is_constant_evaluated) && \ + __cpp_lib_is_constant_evaluated >= 201811L +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 1 +#else +#define FASTFLOAT_HAS_IS_CONSTANT_EVALUATED 0 +#endif + +// Testing for relevant C++20 constexpr library features +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED && FASTFLOAT_HAS_BIT_CAST && \ + __cpp_lib_constexpr_algorithms >= 201806L /*For std::copy and std::fill*/ +#define FASTFLOAT_CONSTEXPR20 constexpr +#define FASTFLOAT_IS_CONSTEXPR 1 +#else +#define FASTFLOAT_CONSTEXPR20 +#define FASTFLOAT_IS_CONSTEXPR 0 +#endif + +#if __cplusplus >= 201703L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201703L) +#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 0 +#else +#define FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE 1 +#endif + +#endif // FASTFLOAT_CONSTEXPR_FEATURE_DETECT_H + +#ifndef FASTFLOAT_FLOAT_COMMON_H +#define FASTFLOAT_FLOAT_COMMON_H + +#include +#include +#include +#include +#include +#include +#ifdef __has_include +#if __has_include() && (__cplusplus > 202002L || _MSVC_LANG > 202002L) +#include +#endif +#endif + +namespace fast_float { + +#define FASTFLOAT_JSONFMT (1 << 5) +#define FASTFLOAT_FORTRANFMT (1 << 6) + +enum chars_format { + scientific = 1 << 0, + fixed = 1 << 2, + hex = 1 << 3, + no_infnan = 1 << 4, + // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 + json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan, + // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. + json_or_infnan = FASTFLOAT_JSONFMT | fixed | scientific, + fortran = FASTFLOAT_FORTRANFMT | fixed | scientific, + general = fixed | scientific +}; + +template struct from_chars_result_t { + UC const *ptr; + std::errc ec; +}; +using from_chars_result = from_chars_result_t; + +template struct parse_options_t { + constexpr explicit parse_options_t(chars_format fmt = chars_format::general, + UC dot = UC('.')) + : format(fmt), decimal_point(dot) {} + + /** Which number formats are accepted */ + chars_format format; + /** The character used as decimal point */ + UC decimal_point; +}; +using parse_options = parse_options_t; + +} // namespace fast_float + +#if FASTFLOAT_HAS_BIT_CAST +#include +#endif + +#if (defined(__x86_64) || defined(__x86_64__) || defined(_M_X64) || \ + defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) || \ + defined(__MINGW64__) || defined(__s390x__) || \ + (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \ + defined(__PPC64LE__)) || \ + defined(__loongarch64)) +#define FASTFLOAT_64BIT 1 +#elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ + defined(__arm__) || defined(_M_ARM) || defined(__ppc__) || \ + defined(__MINGW32__) || defined(__EMSCRIPTEN__)) +#define FASTFLOAT_32BIT 1 +#else + // Need to check incrementally, since SIZE_MAX is a size_t, avoid overflow. +// We can never tell the register width, but the SIZE_MAX is a good +// approximation. UINTPTR_MAX and INTPTR_MAX are optional, so avoid them for max +// portability. +#if SIZE_MAX == 0xffff +#error Unknown platform (16-bit, unsupported) +#elif SIZE_MAX == 0xffffffff +#define FASTFLOAT_32BIT 1 +#elif SIZE_MAX == 0xffffffffffffffff +#define FASTFLOAT_64BIT 1 +#else +#error Unknown platform (not 32-bit, not 64-bit?) +#endif +#endif + +#if ((defined(_WIN32) || defined(_WIN64)) && !defined(__clang__)) || \ + (defined(_M_ARM64) && !defined(__MINGW32__)) +#include +#endif + +#if defined(_MSC_VER) && !defined(__clang__) +#define FASTFLOAT_VISUAL_STUDIO 1 +#endif + +#if defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#elif defined _WIN32 +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#if defined(__APPLE__) || defined(__FreeBSD__) +#include +#elif defined(sun) || defined(__sun) +#include +#elif defined(__MVS__) +#include +#else +#ifdef __has_include +#if __has_include() +#include +#endif //__has_include() +#endif //__has_include +#endif +# +#ifndef __BYTE_ORDER__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#ifndef __ORDER_LITTLE_ENDIAN__ +// safe choice +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#endif +# +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define FASTFLOAT_IS_BIG_ENDIAN 0 +#else +#define FASTFLOAT_IS_BIG_ENDIAN 1 +#endif +#endif + +#if defined(__SSE2__) || (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || \ + (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define FASTFLOAT_NEON 1 +#endif + +#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_NEON) +#define FASTFLOAT_HAS_SIMD 1 +#endif + +#if defined(__GNUC__) +// disable -Wcast-align=strict (GCC only) +#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") +#else +#define FASTFLOAT_SIMD_DISABLE_WARNINGS +#endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_RESTORE_WARNINGS _Pragma("GCC diagnostic pop") +#else +#define FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif + +#ifdef FASTFLOAT_VISUAL_STUDIO +#define fastfloat_really_inline __forceinline +#else +#define fastfloat_really_inline inline __attribute__((always_inline)) +#endif + +#ifndef FASTFLOAT_ASSERT +#define FASTFLOAT_ASSERT(x) \ + { ((void)(x)); } +#endif + +#ifndef FASTFLOAT_DEBUG_ASSERT +#define FASTFLOAT_DEBUG_ASSERT(x) \ + { ((void)(x)); } +#endif + +// rust style `try!()` macro, or `?` operator +#define FASTFLOAT_TRY(x) \ + { \ + if (!(x)) \ + return false; \ + } + +#define FASTFLOAT_ENABLE_IF(...) \ + typename std::enable_if<(__VA_ARGS__), int>::type + +namespace fast_float { + +fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { +#if FASTFLOAT_HAS_IS_CONSTANT_EVALUATED + return std::is_constant_evaluated(); +#else + return false; +#endif +} + +template +fastfloat_really_inline constexpr bool is_supported_float_type() { + return std::is_same::value || std::is_same::value +#if __STDCPP_FLOAT32_T__ + || std::is_same::value +#endif +#if __STDCPP_FLOAT64_T__ + || std::is_same::value +#endif + ; +} + +template +fastfloat_really_inline constexpr bool is_supported_char_type() { + return std::is_same::value || std::is_same::value || + std::is_same::value || std::is_same::value; +} + +// Compares two ASCII strings in a case insensitive manner. +template +inline FASTFLOAT_CONSTEXPR14 bool +fastfloat_strncasecmp(UC const *input1, UC const *input2, size_t length) { + char running_diff{0}; + for (size_t i = 0; i < length; ++i) { + running_diff |= (char(input1[i]) ^ char(input2[i])); + } + return (running_diff == 0) || (running_diff == 32); +} + +#ifndef FLT_EVAL_METHOD +#error "FLT_EVAL_METHOD should be defined, please include cfloat." +#endif + +// a pointer and a length to a contiguous block of memory +template struct span { + const T *ptr; + size_t length; + constexpr span(const T *_ptr, size_t _length) : ptr(_ptr), length(_length) {} + constexpr span() : ptr(nullptr), length(0) {} + + constexpr size_t len() const noexcept { return length; } + + FASTFLOAT_CONSTEXPR14 const T &operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return ptr[index]; + } +}; + +struct value128 { + uint64_t low; + uint64_t high; + constexpr value128(uint64_t _low, uint64_t _high) : low(_low), high(_high) {} + constexpr value128() : low(0), high(0) {} +}; + +/* Helper C++14 constexpr generic implementation of leading_zeroes */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int +leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { + if (input_num & uint64_t(0xffffffff00000000)) { + input_num >>= 32; + last_bit |= 32; + } + if (input_num & uint64_t(0xffff0000)) { + input_num >>= 16; + last_bit |= 16; + } + if (input_num & uint64_t(0xff00)) { + input_num >>= 8; + last_bit |= 8; + } + if (input_num & uint64_t(0xf0)) { + input_num >>= 4; + last_bit |= 4; + } + if (input_num & uint64_t(0xc)) { + input_num >>= 2; + last_bit |= 2; + } + if (input_num & uint64_t(0x2)) { /* input_num >>= 1; */ + last_bit |= 1; + } + return 63 - last_bit; +} + +/* result might be undefined when input_num is zero */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 int +leading_zeroes(uint64_t input_num) { + assert(input_num > 0); + if (cpp20_and_in_constexpr()) { + return leading_zeroes_generic(input_num); + } +#ifdef FASTFLOAT_VISUAL_STUDIO +#if defined(_M_X64) || defined(_M_ARM64) + unsigned long leading_zero = 0; + // Search the mask data from most significant bit (MSB) + // to least significant bit (LSB) for a set bit (1). + _BitScanReverse64(&leading_zero, input_num); + return (int)(63 - leading_zero); +#else + return leading_zeroes_generic(input_num); +#endif +#else + return __builtin_clzll(input_num); +#endif +} + +// slow emulation routine for 32-bit +fastfloat_really_inline constexpr uint64_t emulu(uint32_t x, uint32_t y) { + return x * (uint64_t)y; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t +umul128_generic(uint64_t ab, uint64_t cd, uint64_t *hi) { + uint64_t ad = emulu((uint32_t)(ab >> 32), (uint32_t)cd); + uint64_t bd = emulu((uint32_t)ab, (uint32_t)cd); + uint64_t adbc = ad + emulu((uint32_t)ab, (uint32_t)(cd >> 32)); + uint64_t adbc_carry = (uint64_t)(adbc < ad); + uint64_t lo = bd + (adbc << 32); + *hi = emulu((uint32_t)(ab >> 32), (uint32_t)(cd >> 32)) + (adbc >> 32) + + (adbc_carry << 32) + (uint64_t)(lo < bd); + return lo; +} + +#ifdef FASTFLOAT_32BIT + +// slow emulation routine for 32-bit +#if !defined(__MINGW64__) +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t _umul128(uint64_t ab, + uint64_t cd, + uint64_t *hi) { + return umul128_generic(ab, cd, hi); +} +#endif // !__MINGW64__ + +#endif // FASTFLOAT_32BIT + +// compute 64-bit a*b +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128 +full_multiplication(uint64_t a, uint64_t b) { + if (cpp20_and_in_constexpr()) { + value128 answer; + answer.low = umul128_generic(a, b, &answer.high); + return answer; + } + value128 answer; +#if defined(_M_ARM64) && !defined(__MINGW32__) + // ARM64 has native support for 64-bit multiplications, no need to emulate + // But MinGW on ARM64 doesn't have native support for 64-bit multiplications + answer.high = __umulh(a, b); + answer.low = a * b; +#elif defined(FASTFLOAT_32BIT) || \ + (defined(_WIN64) && !defined(__clang__) && !defined(_M_ARM64)) + answer.low = _umul128(a, b, &answer.high); // _umul128 not available on ARM64 +#elif defined(FASTFLOAT_64BIT) && defined(__SIZEOF_INT128__) + __uint128_t r = ((__uint128_t)a) * b; + answer.low = uint64_t(r); + answer.high = uint64_t(r >> 64); +#else + answer.low = umul128_generic(a, b, &answer.high); +#endif + return answer; +} + +struct adjusted_mantissa { + uint64_t mantissa{0}; + int32_t power2{0}; // a negative value indicates an invalid result + adjusted_mantissa() = default; + constexpr bool operator==(const adjusted_mantissa &o) const { + return mantissa == o.mantissa && power2 == o.power2; + } + constexpr bool operator!=(const adjusted_mantissa &o) const { + return mantissa != o.mantissa || power2 != o.power2; + } +}; + +// Bias so we can get the real exponent with an invalid adjusted_mantissa. +constexpr static int32_t invalid_am_bias = -0x8000; + +// used for binary_format_lookup_tables::max_mantissa +constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; + +template struct binary_format_lookup_tables; + +template struct binary_format : binary_format_lookup_tables { + using equiv_uint = + typename std::conditional::type; + + static inline constexpr int mantissa_explicit_bits(); + static inline constexpr int minimum_exponent(); + static inline constexpr int infinite_power(); + static inline constexpr int sign_index(); + static inline constexpr int + min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST + static inline constexpr int max_exponent_fast_path(); + static inline constexpr int max_exponent_round_to_even(); + static inline constexpr int min_exponent_round_to_even(); + static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); + static inline constexpr uint64_t + max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST + static inline constexpr int largest_power_of_ten(); + static inline constexpr int smallest_power_of_ten(); + static inline constexpr T exact_power_of_ten(int64_t power); + static inline constexpr size_t max_digits(); + static inline constexpr equiv_uint exponent_mask(); + static inline constexpr equiv_uint mantissa_mask(); + static inline constexpr equiv_uint hidden_bit_mask(); +}; + +template struct binary_format_lookup_tables { + static constexpr double powers_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + + // Largest integer value v so that (5**index * v) <= 1<<53. + // 0x20000000000000 == 1 << 53 + static constexpr uint64_t max_mantissa[] = { + 0x20000000000000, + 0x20000000000000 / 5, + 0x20000000000000 / (5 * 5), + 0x20000000000000 / (5 * 5 * 5), + 0x20000000000000 / (5 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555), + 0x20000000000000 / (constant_55555 * 5), + 0x20000000000000 / (constant_55555 * 5 * 5), + 0x20000000000000 / (constant_55555 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555), + 0x20000000000000 / (constant_55555 * constant_55555 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * 5 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5), + 0x20000000000000 / + (constant_55555 * constant_55555 * constant_55555 * constant_55555), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5 * 5 * 5), + 0x20000000000000 / (constant_55555 * constant_55555 * constant_55555 * + constant_55555 * 5 * 5 * 5 * 5)}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr double binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +#endif + +template struct binary_format_lookup_tables { + static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, + 1e6f, 1e7f, 1e8f, 1e9f, 1e10f}; + + // Largest integer value v so that (5**index * v) <= 1<<24. + // 0x1000000 == 1<<24 + static constexpr uint64_t max_mantissa[] = { + 0x1000000, + 0x1000000 / 5, + 0x1000000 / (5 * 5), + 0x1000000 / (5 * 5 * 5), + 0x1000000 / (5 * 5 * 5 * 5), + 0x1000000 / (constant_55555), + 0x1000000 / (constant_55555 * 5), + 0x1000000 / (constant_55555 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * constant_55555), + 0x1000000 / (constant_55555 * constant_55555 * 5)}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr float binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +#endif + +template <> +inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -22; +#endif +} + +template <> +inline constexpr int binary_format::min_exponent_fast_path() { +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return 0; +#else + return -10; +#endif +} + +template <> +inline constexpr int binary_format::mantissa_explicit_bits() { + return 52; +} +template <> +inline constexpr int binary_format::mantissa_explicit_bits() { + return 23; +} + +template <> +inline constexpr int binary_format::max_exponent_round_to_even() { + return 23; +} + +template <> +inline constexpr int binary_format::max_exponent_round_to_even() { + return 10; +} + +template <> +inline constexpr int binary_format::min_exponent_round_to_even() { + return -4; +} + +template <> +inline constexpr int binary_format::min_exponent_round_to_even() { + return -17; +} + +template <> inline constexpr int binary_format::minimum_exponent() { + return -1023; +} +template <> inline constexpr int binary_format::minimum_exponent() { + return -127; +} + +template <> inline constexpr int binary_format::infinite_power() { + return 0x7FF; +} +template <> inline constexpr int binary_format::infinite_power() { + return 0xFF; +} + +template <> inline constexpr int binary_format::sign_index() { + return 63; +} +template <> inline constexpr int binary_format::sign_index() { + return 31; +} + +template <> +inline constexpr int binary_format::max_exponent_fast_path() { + return 22; +} +template <> +inline constexpr int binary_format::max_exponent_fast_path() { + return 10; +} + +template <> +inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 22 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} +template <> +inline constexpr uint64_t binary_format::max_mantissa_fast_path() { + return uint64_t(2) << mantissa_explicit_bits(); +} +template <> +inline constexpr uint64_t +binary_format::max_mantissa_fast_path(int64_t power) { + // caller is responsible to ensure that + // power >= 0 && power <= 10 + // + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; +} + +template <> +inline constexpr double +binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} +template <> +inline constexpr float binary_format::exact_power_of_ten(int64_t power) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; +} + +template <> inline constexpr int binary_format::largest_power_of_ten() { + return 308; +} +template <> inline constexpr int binary_format::largest_power_of_ten() { + return 38; +} + +template <> +inline constexpr int binary_format::smallest_power_of_ten() { + return -342; +} +template <> inline constexpr int binary_format::smallest_power_of_ten() { + return -64; +} + +template <> inline constexpr size_t binary_format::max_digits() { + return 769; +} +template <> inline constexpr size_t binary_format::max_digits() { + return 114; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::exponent_mask() { + return 0x7F800000; +} +template <> +inline constexpr binary_format::equiv_uint +binary_format::exponent_mask() { + return 0x7FF0000000000000; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::mantissa_mask() { + return 0x007FFFFF; +} +template <> +inline constexpr binary_format::equiv_uint +binary_format::mantissa_mask() { + return 0x000FFFFFFFFFFFFF; +} + +template <> +inline constexpr binary_format::equiv_uint +binary_format::hidden_bit_mask() { + return 0x00800000; +} +template <> +inline constexpr binary_format::equiv_uint +binary_format::hidden_bit_mask() { + return 0x0010000000000000; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +to_float(bool negative, adjusted_mantissa am, T &value) { + using fastfloat_uint = typename binary_format::equiv_uint; + fastfloat_uint word = (fastfloat_uint)am.mantissa; + word |= fastfloat_uint(am.power2) + << binary_format::mantissa_explicit_bits(); + word |= fastfloat_uint(negative) << binary_format::sign_index(); +#if FASTFLOAT_HAS_BIT_CAST + value = std::bit_cast(word); +#else + ::memcpy(&value, &word, sizeof(T)); +#endif +} + +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default +template struct space_lut { + static constexpr bool value[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template constexpr bool space_lut::value[]; + +#endif + +inline constexpr bool is_space(uint8_t c) { return space_lut<>::value[c]; } +#endif + +template static constexpr uint64_t int_cmp_zeros() { + static_assert((sizeof(UC) == 1) || (sizeof(UC) == 2) || (sizeof(UC) == 4), + "Unsupported character size"); + return (sizeof(UC) == 1) ? 0x3030303030303030 + : (sizeof(UC) == 2) + ? (uint64_t(UC('0')) << 48 | uint64_t(UC('0')) << 32 | + uint64_t(UC('0')) << 16 | UC('0')) + : (uint64_t(UC('0')) << 32 | UC('0')); +} +template static constexpr int int_cmp_len() { + return sizeof(uint64_t) / sizeof(UC); +} +template static constexpr UC const *str_const_nan() { + return nullptr; +} +template <> constexpr char const *str_const_nan() { return "nan"; } +template <> constexpr wchar_t const *str_const_nan() { return L"nan"; } +template <> constexpr char16_t const *str_const_nan() { + return u"nan"; +} +template <> constexpr char32_t const *str_const_nan() { + return U"nan"; +} +template static constexpr UC const *str_const_inf() { + return nullptr; +} +template <> constexpr char const *str_const_inf() { return "infinity"; } +template <> constexpr wchar_t const *str_const_inf() { + return L"infinity"; +} +template <> constexpr char16_t const *str_const_inf() { + return u"infinity"; +} +template <> constexpr char32_t const *str_const_inf() { + return U"infinity"; +} + +template struct int_luts { + static constexpr uint8_t chdigit[] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, + 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, + 35, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + + static constexpr size_t maxdigits_u64[] = { + 64, 41, 32, 28, 25, 23, 22, 21, 20, 19, 18, 18, 17, 17, 16, 16, 16, 16, + 15, 15, 15, 15, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, 13, 13, 13}; + + static constexpr uint64_t min_safe_u64[] = { + 9223372036854775808ull, 12157665459056928801ull, 4611686018427387904, + 7450580596923828125, 4738381338321616896, 3909821048582988049, + 9223372036854775808ull, 12157665459056928801ull, 10000000000000000000ull, + 5559917313492231481, 2218611106740436992, 8650415919381337933, + 2177953337809371136, 6568408355712890625, 1152921504606846976, + 2862423051509815793, 6746640616477458432, 15181127029874798299ull, + 1638400000000000000, 3243919932521508681, 6221821273427820544, + 11592836324538749809ull, 876488338465357824, 1490116119384765625, + 2481152873203736576, 4052555153018976267, 6502111422497947648, + 10260628712958602189ull, 15943230000000000000ull, 787662783788549761, + 1152921504606846976, 1667889514952984961, 2386420683693101056, + 3379220508056640625, 4738381338321616896}; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template constexpr uint8_t int_luts::chdigit[]; + +template constexpr size_t int_luts::maxdigits_u64[]; + +template constexpr uint64_t int_luts::min_safe_u64[]; + +#endif + +template +fastfloat_really_inline constexpr uint8_t ch_to_digit(UC c) { + return int_luts<>::chdigit[static_cast(c)]; +} + +fastfloat_really_inline constexpr size_t max_digits_u64(int base) { + return int_luts<>::maxdigits_u64[base - 2]; +} + +// If a u64 is exactly max_digits_u64() in length, this is +// the value below which it has definitely overflowed. +fastfloat_really_inline constexpr uint64_t min_safe_u64(int base) { + return int_luts<>::min_safe_u64[base - 2]; +} + +} // namespace fast_float + +#endif + + +#ifndef FASTFLOAT_FAST_FLOAT_H +#define FASTFLOAT_FAST_FLOAT_H + + +namespace fast_float { +/** + * This function parses the character sequence [first,last) for a number. It + * parses floating-point numbers expecting a locale-indepent format equivalent + * to what is used by std::strtod in the default ("C") locale. The resulting + * floating-point value is the closest floating-point values (using either float + * or double), using the "round to even" convention for values that would + * otherwise fall right in-between two values. That is, we provide exact parsing + * according to the IEEE standard. + * + * Given a successful parse, the pointer (`ptr`) in the returned value is set to + * point right after the parsed number, and the `value` referenced is set to the + * parsed value. In case of error, the returned `ec` contains a representative + * error, otherwise the default (`std::errc()`) value is stored. + * + * The implementation does not throw and does not allocate memory (e.g., with + * `new` or `malloc`). + * + * Like the C++17 standard, the `fast_float::from_chars` functions take an + * optional last argument of the type `fast_float::chars_format`. It is a bitset + * value: we check whether `fmt & fast_float::chars_format::fixed` and `fmt & + * fast_float::chars_format::scientific` are set to determine whether we allow + * the fixed point and scientific notation respectively. The default is + * `fast_float::chars_format::general` which allows both `fixed` and + * `scientific`. + */ +template ())> +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, + chars_format fmt = chars_format::general) noexcept; + +/** + * Like from_chars, but accepts an `options` argument to govern number parsing. + */ +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_advanced(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept; +/** + * from_chars for integer types. + */ +template ())> +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, int base = 10) noexcept; + +} // namespace fast_float +#endif // FASTFLOAT_FAST_FLOAT_H + +#ifndef FASTFLOAT_ASCII_NUMBER_H +#define FASTFLOAT_ASCII_NUMBER_H + +#include +#include +#include +#include +#include +#include + + +#ifdef FASTFLOAT_SSE2 +#include +#endif + +#ifdef FASTFLOAT_NEON +#include +#endif + +namespace fast_float { + +template fastfloat_really_inline constexpr bool has_simd_opt() { +#ifdef FASTFLOAT_HAS_SIMD + return std::is_same::value; +#else + return false; +#endif +} + +// Next function can be micro-optimized, but compilers are entirely +// able to optimize it well. +template +fastfloat_really_inline constexpr bool is_integer(UC c) noexcept { + return !(c > UC('9') || c < UC('0')); +} + +fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { + return (val & 0xFF00000000000000) >> 56 | (val & 0x00FF000000000000) >> 40 | + (val & 0x0000FF0000000000) >> 24 | (val & 0x000000FF00000000) >> 8 | + (val & 0x00000000FF000000) << 8 | (val & 0x0000000000FF0000) << 24 | + (val & 0x000000000000FF00) << 40 | (val & 0x00000000000000FF) << 56; +} + +// Read 8 UC into a u64. Truncates UC if not char. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +read8_to_u64(const UC *chars) { + if (cpp20_and_in_constexpr() || !std::is_same::value) { + uint64_t val = 0; + for (int i = 0; i < 8; ++i) { + val |= uint64_t(uint8_t(*chars)) << (i * 8); + ++chars; + } + return val; + } + uint64_t val; + ::memcpy(&val, chars, sizeof(uint64_t)); +#if FASTFLOAT_IS_BIG_ENDIAN == 1 + // Need to read as-if the number was in little-endian order. + val = byteswap(val); +#endif + return val; +} + +#ifdef FASTFLOAT_SSE2 + +fastfloat_really_inline uint64_t simd_read8_to_u64(const __m128i data) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i packed = _mm_packus_epi16(data, data); +#ifdef FASTFLOAT_64BIT + return uint64_t(_mm_cvtsi128_si64(packed)); +#else + uint64_t value; + // Visual Studio + older versions of GCC don't support _mm_storeu_si64 + _mm_storel_epi64(reinterpret_cast<__m128i *>(&value), packed); + return value; +#endif + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline uint64_t simd_read8_to_u64(const char16_t *chars) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64( + _mm_loadu_si128(reinterpret_cast(chars))); + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#elif defined(FASTFLOAT_NEON) + +fastfloat_really_inline uint64_t simd_read8_to_u64(const uint16x8_t data) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + uint8x8_t utf8_packed = vmovn_u16(data); + return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0); + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline uint64_t simd_read8_to_u64(const char16_t *chars) { + FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64( + vld1q_u16(reinterpret_cast(chars))); + FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#endif // FASTFLOAT_SSE2 + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +uint64_t simd_read8_to_u64(UC const *) { + return 0; +} + +// credit @aqrit +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint32_t +parse_eight_digits_unrolled(uint64_t val) { + const uint64_t mask = 0x000000FF000000FF; + const uint64_t mul1 = 0x000F424000000064; // 100 + (1000000ULL << 32) + const uint64_t mul2 = 0x0000271000000001; // 1 + (10000ULL << 32) + val -= 0x3030303030303030; + val = (val * 10) + (val >> 8); // val = (val * 2561) >> 8; + val = (((val & mask) * mul1) + (((val >> 16) & mask) * mul2)) >> 32; + return uint32_t(val); +} + +// Call this if chars are definitely 8 digits. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint32_t +parse_eight_digits_unrolled(UC const *chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd_opt()) { + return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay + } + return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); +} + +// credit @aqrit +fastfloat_really_inline constexpr bool +is_made_of_eight_digits_fast(uint64_t val) noexcept { + return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & + 0x8080808080808080)); +} + +#ifdef FASTFLOAT_HAS_SIMD + +// Call this if chars might not be 8 digits. +// Using this style (instead of is_made_of_eight_digits_fast() then +// parse_eight_digits_unrolled()) ensures we don't load SIMD registers twice. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +simd_parse_if_eight_digits_unrolled(const char16_t *chars, + uint64_t &i) noexcept { + if (cpp20_and_in_constexpr()) { + return false; + } +#ifdef FASTFLOAT_SSE2 + FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i data = + _mm_loadu_si128(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); + const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); + + if (_mm_movemask_epi8(t1) == 0) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } else + return false; + FASTFLOAT_SIMD_RESTORE_WARNINGS +#elif defined(FASTFLOAT_NEON) + FASTFLOAT_SIMD_DISABLE_WARNINGS + const uint16x8_t data = vld1q_u16(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0')); + const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1)); + + if (vminvq_u16(mask) == 0xFFFF) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } else + return false; + FASTFLOAT_SIMD_RESTORE_WARNINGS +#else + (void)chars; + (void)i; + return false; +#endif // FASTFLOAT_SSE2 +} + +#endif // FASTFLOAT_HAS_SIMD + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ()) = 0> +#endif +// dummy for compile +bool simd_parse_if_eight_digits_unrolled(UC const *, uint64_t &) { + return 0; +} + +template ::value) = 0> +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +loop_parse_if_eight_digits(const UC *&p, const UC *const pend, uint64_t &i) { + if (!has_simd_opt()) { + return; + } + while ((std::distance(p, pend) >= 8) && + simd_parse_if_eight_digits_unrolled( + p, i)) { // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +loop_parse_if_eight_digits(const char *&p, const char *const pend, + uint64_t &i) { + // optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while ((std::distance(p, pend) >= 8) && + is_made_of_eight_digits_fast(read8_to_u64(p))) { + i = i * 100000000 + + parse_eight_digits_unrolled(read8_to_u64( + p)); // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +enum class parse_error { + no_error, + // [JSON-only] The minus sign must be followed by an integer. + missing_integer_after_sign, + // A sign must be followed by an integer or dot. + missing_integer_or_dot_after_sign, + // [JSON-only] The integer part must not have leading zeros. + leading_zeros_in_integer_part, + // [JSON-only] The integer part must have at least one digit. + no_digits_in_integer_part, + // [JSON-only] If there is a decimal point, there must be digits in the + // fractional part. + no_digits_in_fractional_part, + // The mantissa must have at least one digit. + no_digits_in_mantissa, + // Scientific notation requires an exponential part. + missing_exponential_part, +}; + +template struct parsed_number_string_t { + int64_t exponent{0}; + uint64_t mantissa{0}; + UC const *lastmatch{nullptr}; + bool negative{false}; + bool valid{false}; + bool too_many_digits{false}; + // contains the range of the significant digits + span integer{}; // non-nullable + span fraction{}; // nullable + parse_error error{parse_error::no_error}; +}; + +using byte_span = span; +using parsed_number_string = parsed_number_string_t; + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t +report_parse_error(UC const *p, parse_error error) { + parsed_number_string_t answer; + answer.valid = false; + answer.lastmatch = p; + answer.error = error; + return answer; +} + +// Assuming that you use no more than 19 digits, this will +// parse an ASCII string. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t +parse_number_string(UC const *p, UC const *pend, + parse_options_t options) noexcept { + chars_format const fmt = options.format; + UC const decimal_point = options.decimal_point; + + parsed_number_string_t answer; + answer.valid = false; + answer.too_many_digits = false; + answer.negative = (*p == UC('-')); +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) { +#else + if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here +#endif + ++p; + if (p == pend) { + return report_parse_error( + p, parse_error::missing_integer_or_dot_after_sign); + } + if (fmt & FASTFLOAT_JSONFMT) { + if (!is_integer(*p)) { // a sign must be followed by an integer + return report_parse_error(p, + parse_error::missing_integer_after_sign); + } + } else { + if (!is_integer(*p) && + (*p != + decimal_point)) { // a sign must be followed by an integer or the dot + return report_parse_error( + p, parse_error::missing_integer_or_dot_after_sign); + } + } + } + UC const *const start_digits = p; + + uint64_t i = 0; // an unsigned int avoids signed overflows (which are bad) + + while ((p != pend) && is_integer(*p)) { + // a multiplication by 10 is cheaper than an arbitrary integer + // multiplication + i = 10 * i + + uint64_t(*p - + UC('0')); // might overflow, we will handle the overflow later + ++p; + } + UC const *const end_of_integer_part = p; + int64_t digit_count = int64_t(end_of_integer_part - start_digits); + answer.integer = span(start_digits, size_t(digit_count)); + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in integer part, without leading zeros + if (digit_count == 0) { + return report_parse_error(p, parse_error::no_digits_in_integer_part); + } + if ((start_digits[0] == UC('0') && digit_count > 1)) { + return report_parse_error(start_digits, + parse_error::leading_zeros_in_integer_part); + } + } + + int64_t exponent = 0; + const bool has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { + ++p; + UC const *before = p; + // can occur at most twice without overflowing, but let it occur more, since + // for integers with many digits, digit parsing is the primary bottleneck. + loop_parse_if_eight_digits(p, pend, i); + + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - UC('0')); + ++p; + i = i * 10 + digit; // in rare cases, this will overflow, but that's ok + } + exponent = before - p; + answer.fraction = span(before, size_t(p - before)); + digit_count -= exponent; + } + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in fractional part + if (has_decimal_point && exponent == 0) { + return report_parse_error(p, + parse_error::no_digits_in_fractional_part); + } + } else if (digit_count == + 0) { // we must have encountered at least one integer! + return report_parse_error(p, parse_error::no_digits_in_mantissa); + } + int64_t exp_number = 0; // explicit exponential part + if (((fmt & chars_format::scientific) && (p != pend) && + ((UC('e') == *p) || (UC('E') == *p))) || + ((fmt & FASTFLOAT_FORTRANFMT) && (p != pend) && + ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || + (UC('D') == *p)))) { + UC const *location_of_e = p; + if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || + (UC('D') == *p)) { + ++p; + } + bool neg_exp = false; + if ((p != pend) && (UC('-') == *p)) { + neg_exp = true; + ++p; + } else if ((p != pend) && + (UC('+') == + *p)) { // '+' on exponent is allowed by C++17 20.19.3.(7.1) + ++p; + } + if ((p == pend) || !is_integer(*p)) { + if (!(fmt & chars_format::fixed)) { + // The exponential part is invalid for scientific notation, so it must + // be a trailing token for fixed notation. However, fixed notation is + // disabled, so report a scientific notation error. + return report_parse_error(p, parse_error::missing_exponential_part); + } + // Otherwise, we will be ignoring the 'e'. + p = location_of_e; + } else { + while ((p != pend) && is_integer(*p)) { + uint8_t digit = uint8_t(*p - UC('0')); + if (exp_number < 0x10000000) { + exp_number = 10 * exp_number + digit; + } + ++p; + } + if (neg_exp) { + exp_number = -exp_number; + } + exponent += exp_number; + } + } else { + // If it scientific and not fixed, we have to bail out. + if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { + return report_parse_error(p, parse_error::missing_exponential_part); + } + } + answer.lastmatch = p; + answer.valid = true; + + // If we frequently had to deal with long strings of digits, + // we could extend our code by using a 128-bit integer instead + // of a 64-bit integer. However, this is uncommon. + // + // We can deal with up to 19 digits. + if (digit_count > 19) { // this is uncommon + // It is possible that the integer had an overflow. + // We have to handle the case where we have 0.0000somenumber. + // We need to be mindful of the case where we only have zeroes... + // E.g., 0.000000000...000. + UC const *start = start_digits; + while ((start != pend) && (*start == UC('0') || *start == decimal_point)) { + if (*start == UC('0')) { + digit_count--; + } + start++; + } + + if (digit_count > 19) { + answer.too_many_digits = true; + // Let us start again, this time, avoiding overflows. + // We don't need to check if is_integer, since we use the + // pre-tokenized spans from above. + i = 0; + p = answer.integer.ptr; + UC const *int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + if (i >= minimal_nineteen_digit_integer) { // We have a big integers + exponent = end_of_integer_part - p + exp_number; + } else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const *frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; + } + // We have now corrected both exponent and i, to a truncated value + } + } + answer.exponent = exponent; + answer.mantissa = i; + return answer; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 from_chars_result_t +parse_int_string(UC const *p, UC const *pend, T &value, int base) { + from_chars_result_t answer; + + UC const *const first = p; + + bool negative = (*p == UC('-')); + if (!std::is_signed::value && negative) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if ((*p == UC('-')) || (*p == UC('+'))) { +#else + if (*p == UC('-')) { +#endif + ++p; + } + + UC const *const start_num = p; + + while (p != pend && *p == UC('0')) { + ++p; + } + + const bool has_leading_zeros = p > start_num; + + UC const *const start_digits = p; + + uint64_t i = 0; + if (base == 10) { + loop_parse_if_eight_digits(p, pend, i); // use SIMD if possible + } + while (p != pend) { + uint8_t digit = ch_to_digit(*p); + if (digit >= base) { + break; + } + i = uint64_t(base) * i + digit; // might overflow, check this later + p++; + } + + size_t digit_count = size_t(p - start_digits); + + if (digit_count == 0) { + if (has_leading_zeros) { + value = 0; + answer.ec = std::errc(); + answer.ptr = p; + } else { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + } + return answer; + } + + answer.ptr = p; + + // check u64 overflow + size_t max_digits = max_digits_u64(base); + if (digit_count > max_digits) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + // this check can be eliminated for all other types, but they will all require + // a max_digits(base) equivalent + if (digit_count == max_digits && i < min_safe_u64(base)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + + // check other types overflow + if (!std::is_same::value) { + if (i > uint64_t(std::numeric_limits::max()) + uint64_t(negative)) { + answer.ec = std::errc::result_out_of_range; + return answer; + } + } + + if (negative) { +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(push) +#pragma warning(disable : 4146) +#endif + // this weird workaround is required because: + // - converting unsigned to signed when its value is greater than signed max + // is UB pre-C++23. + // - reinterpret_casting (~i + 1) would work, but it is not constexpr + // this is always optimized into a neg instruction (note: T is an integer + // type) + value = T(-std::numeric_limits::max() - + T(i - uint64_t(std::numeric_limits::max()))); +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(pop) +#endif + } else { + value = T(i); + } + + answer.ec = std::errc(); + return answer; +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_FAST_TABLE_H +#define FASTFLOAT_FAST_TABLE_H + +#include + +namespace fast_float { + +/** + * When mapping numbers from decimal to binary, + * we go from w * 10^q to m * 2^p but we have + * 10^q = 5^q * 2^q, so effectively + * we are trying to match + * w * 2^q * 5^q to m * 2^p. Thus the powers of two + * are not a concern since they can be represented + * exactly using the binary notation, only the powers of five + * affect the binary significand. + */ + +/** + * The smallest non-zero float (binary64) is 2^-1074. + * We take as input numbers of the form w x 10^q where w < 2^64. + * We have that w * 10^-343 < 2^(64-344) 5^-343 < 2^-1076. + * However, we have that + * (2^64-1) * 10^-342 = (2^64-1) * 2^-342 * 5^-342 > 2^-1074. + * Thus it is possible for a number of the form w * 10^-342 where + * w is a 64-bit value to be a non-zero floating-point number. + ********* + * Any number of form w * 10^309 where w>= 1 is going to be + * infinite in binary64 so we never need to worry about powers + * of 5 greater than 308. + */ +template struct powers_template { + + constexpr static int smallest_power_of_five = + binary_format::smallest_power_of_ten(); + constexpr static int largest_power_of_five = + binary_format::largest_power_of_ten(); + constexpr static int number_of_entries = + 2 * (largest_power_of_five - smallest_power_of_five + 1); + // Powers of five from 5^-342 all the way to 5^308 rounded toward one. + constexpr static uint64_t power_of_five_128[number_of_entries] = { + 0xeef453d6923bd65a, 0x113faa2906a13b3f, + 0x9558b4661b6565f8, 0x4ac7ca59a424c507, + 0xbaaee17fa23ebf76, 0x5d79bcf00d2df649, + 0xe95a99df8ace6f53, 0xf4d82c2c107973dc, + 0x91d8a02bb6c10594, 0x79071b9b8a4be869, + 0xb64ec836a47146f9, 0x9748e2826cdee284, + 0xe3e27a444d8d98b7, 0xfd1b1b2308169b25, + 0x8e6d8c6ab0787f72, 0xfe30f0f5e50e20f7, + 0xb208ef855c969f4f, 0xbdbd2d335e51a935, + 0xde8b2b66b3bc4723, 0xad2c788035e61382, + 0x8b16fb203055ac76, 0x4c3bcb5021afcc31, + 0xaddcb9e83c6b1793, 0xdf4abe242a1bbf3d, + 0xd953e8624b85dd78, 0xd71d6dad34a2af0d, + 0x87d4713d6f33aa6b, 0x8672648c40e5ad68, + 0xa9c98d8ccb009506, 0x680efdaf511f18c2, + 0xd43bf0effdc0ba48, 0x212bd1b2566def2, + 0x84a57695fe98746d, 0x14bb630f7604b57, + 0xa5ced43b7e3e9188, 0x419ea3bd35385e2d, + 0xcf42894a5dce35ea, 0x52064cac828675b9, + 0x818995ce7aa0e1b2, 0x7343efebd1940993, + 0xa1ebfb4219491a1f, 0x1014ebe6c5f90bf8, + 0xca66fa129f9b60a6, 0xd41a26e077774ef6, + 0xfd00b897478238d0, 0x8920b098955522b4, + 0x9e20735e8cb16382, 0x55b46e5f5d5535b0, + 0xc5a890362fddbc62, 0xeb2189f734aa831d, + 0xf712b443bbd52b7b, 0xa5e9ec7501d523e4, + 0x9a6bb0aa55653b2d, 0x47b233c92125366e, + 0xc1069cd4eabe89f8, 0x999ec0bb696e840a, + 0xf148440a256e2c76, 0xc00670ea43ca250d, + 0x96cd2a865764dbca, 0x380406926a5e5728, + 0xbc807527ed3e12bc, 0xc605083704f5ecf2, + 0xeba09271e88d976b, 0xf7864a44c633682e, + 0x93445b8731587ea3, 0x7ab3ee6afbe0211d, + 0xb8157268fdae9e4c, 0x5960ea05bad82964, + 0xe61acf033d1a45df, 0x6fb92487298e33bd, + 0x8fd0c16206306bab, 0xa5d3b6d479f8e056, + 0xb3c4f1ba87bc8696, 0x8f48a4899877186c, + 0xe0b62e2929aba83c, 0x331acdabfe94de87, + 0x8c71dcd9ba0b4925, 0x9ff0c08b7f1d0b14, + 0xaf8e5410288e1b6f, 0x7ecf0ae5ee44dd9, + 0xdb71e91432b1a24a, 0xc9e82cd9f69d6150, + 0x892731ac9faf056e, 0xbe311c083a225cd2, + 0xab70fe17c79ac6ca, 0x6dbd630a48aaf406, + 0xd64d3d9db981787d, 0x92cbbccdad5b108, + 0x85f0468293f0eb4e, 0x25bbf56008c58ea5, + 0xa76c582338ed2621, 0xaf2af2b80af6f24e, + 0xd1476e2c07286faa, 0x1af5af660db4aee1, + 0x82cca4db847945ca, 0x50d98d9fc890ed4d, + 0xa37fce126597973c, 0xe50ff107bab528a0, + 0xcc5fc196fefd7d0c, 0x1e53ed49a96272c8, + 0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7a, + 0x9faacf3df73609b1, 0x77b191618c54e9ac, + 0xc795830d75038c1d, 0xd59df5b9ef6a2417, + 0xf97ae3d0d2446f25, 0x4b0573286b44ad1d, + 0x9becce62836ac577, 0x4ee367f9430aec32, + 0xc2e801fb244576d5, 0x229c41f793cda73f, + 0xf3a20279ed56d48a, 0x6b43527578c1110f, + 0x9845418c345644d6, 0x830a13896b78aaa9, + 0xbe5691ef416bd60c, 0x23cc986bc656d553, + 0xedec366b11c6cb8f, 0x2cbfbe86b7ec8aa8, + 0x94b3a202eb1c3f39, 0x7bf7d71432f3d6a9, + 0xb9e08a83a5e34f07, 0xdaf5ccd93fb0cc53, + 0xe858ad248f5c22c9, 0xd1b3400f8f9cff68, + 0x91376c36d99995be, 0x23100809b9c21fa1, + 0xb58547448ffffb2d, 0xabd40a0c2832a78a, + 0xe2e69915b3fff9f9, 0x16c90c8f323f516c, + 0x8dd01fad907ffc3b, 0xae3da7d97f6792e3, + 0xb1442798f49ffb4a, 0x99cd11cfdf41779c, + 0xdd95317f31c7fa1d, 0x40405643d711d583, + 0x8a7d3eef7f1cfc52, 0x482835ea666b2572, + 0xad1c8eab5ee43b66, 0xda3243650005eecf, + 0xd863b256369d4a40, 0x90bed43e40076a82, + 0x873e4f75e2224e68, 0x5a7744a6e804a291, + 0xa90de3535aaae202, 0x711515d0a205cb36, + 0xd3515c2831559a83, 0xd5a5b44ca873e03, + 0x8412d9991ed58091, 0xe858790afe9486c2, + 0xa5178fff668ae0b6, 0x626e974dbe39a872, + 0xce5d73ff402d98e3, 0xfb0a3d212dc8128f, + 0x80fa687f881c7f8e, 0x7ce66634bc9d0b99, + 0xa139029f6a239f72, 0x1c1fffc1ebc44e80, + 0xc987434744ac874e, 0xa327ffb266b56220, + 0xfbe9141915d7a922, 0x4bf1ff9f0062baa8, + 0x9d71ac8fada6c9b5, 0x6f773fc3603db4a9, + 0xc4ce17b399107c22, 0xcb550fb4384d21d3, + 0xf6019da07f549b2b, 0x7e2a53a146606a48, + 0x99c102844f94e0fb, 0x2eda7444cbfc426d, + 0xc0314325637a1939, 0xfa911155fefb5308, + 0xf03d93eebc589f88, 0x793555ab7eba27ca, + 0x96267c7535b763b5, 0x4bc1558b2f3458de, + 0xbbb01b9283253ca2, 0x9eb1aaedfb016f16, + 0xea9c227723ee8bcb, 0x465e15a979c1cadc, + 0x92a1958a7675175f, 0xbfacd89ec191ec9, + 0xb749faed14125d36, 0xcef980ec671f667b, + 0xe51c79a85916f484, 0x82b7e12780e7401a, + 0x8f31cc0937ae58d2, 0xd1b2ecb8b0908810, + 0xb2fe3f0b8599ef07, 0x861fa7e6dcb4aa15, + 0xdfbdcece67006ac9, 0x67a791e093e1d49a, + 0x8bd6a141006042bd, 0xe0c8bb2c5c6d24e0, + 0xaecc49914078536d, 0x58fae9f773886e18, + 0xda7f5bf590966848, 0xaf39a475506a899e, + 0x888f99797a5e012d, 0x6d8406c952429603, + 0xaab37fd7d8f58178, 0xc8e5087ba6d33b83, + 0xd5605fcdcf32e1d6, 0xfb1e4a9a90880a64, + 0x855c3be0a17fcd26, 0x5cf2eea09a55067f, + 0xa6b34ad8c9dfc06f, 0xf42faa48c0ea481e, + 0xd0601d8efc57b08b, 0xf13b94daf124da26, + 0x823c12795db6ce57, 0x76c53d08d6b70858, + 0xa2cb1717b52481ed, 0x54768c4b0c64ca6e, + 0xcb7ddcdda26da268, 0xa9942f5dcf7dfd09, + 0xfe5d54150b090b02, 0xd3f93b35435d7c4c, + 0x9efa548d26e5a6e1, 0xc47bc5014a1a6daf, + 0xc6b8e9b0709f109a, 0x359ab6419ca1091b, + 0xf867241c8cc6d4c0, 0xc30163d203c94b62, + 0x9b407691d7fc44f8, 0x79e0de63425dcf1d, + 0xc21094364dfb5636, 0x985915fc12f542e4, + 0xf294b943e17a2bc4, 0x3e6f5b7b17b2939d, + 0x979cf3ca6cec5b5a, 0xa705992ceecf9c42, + 0xbd8430bd08277231, 0x50c6ff782a838353, + 0xece53cec4a314ebd, 0xa4f8bf5635246428, + 0x940f4613ae5ed136, 0x871b7795e136be99, + 0xb913179899f68584, 0x28e2557b59846e3f, + 0xe757dd7ec07426e5, 0x331aeada2fe589cf, + 0x9096ea6f3848984f, 0x3ff0d2c85def7621, + 0xb4bca50b065abe63, 0xfed077a756b53a9, + 0xe1ebce4dc7f16dfb, 0xd3e8495912c62894, + 0x8d3360f09cf6e4bd, 0x64712dd7abbbd95c, + 0xb080392cc4349dec, 0xbd8d794d96aacfb3, + 0xdca04777f541c567, 0xecf0d7a0fc5583a0, + 0x89e42caaf9491b60, 0xf41686c49db57244, + 0xac5d37d5b79b6239, 0x311c2875c522ced5, + 0xd77485cb25823ac7, 0x7d633293366b828b, + 0x86a8d39ef77164bc, 0xae5dff9c02033197, + 0xa8530886b54dbdeb, 0xd9f57f830283fdfc, + 0xd267caa862a12d66, 0xd072df63c324fd7b, + 0x8380dea93da4bc60, 0x4247cb9e59f71e6d, + 0xa46116538d0deb78, 0x52d9be85f074e608, + 0xcd795be870516656, 0x67902e276c921f8b, + 0x806bd9714632dff6, 0xba1cd8a3db53b6, + 0xa086cfcd97bf97f3, 0x80e8a40eccd228a4, + 0xc8a883c0fdaf7df0, 0x6122cd128006b2cd, + 0xfad2a4b13d1b5d6c, 0x796b805720085f81, + 0x9cc3a6eec6311a63, 0xcbe3303674053bb0, + 0xc3f490aa77bd60fc, 0xbedbfc4411068a9c, + 0xf4f1b4d515acb93b, 0xee92fb5515482d44, + 0x991711052d8bf3c5, 0x751bdd152d4d1c4a, + 0xbf5cd54678eef0b6, 0xd262d45a78a0635d, + 0xef340a98172aace4, 0x86fb897116c87c34, + 0x9580869f0e7aac0e, 0xd45d35e6ae3d4da0, + 0xbae0a846d2195712, 0x8974836059cca109, + 0xe998d258869facd7, 0x2bd1a438703fc94b, + 0x91ff83775423cc06, 0x7b6306a34627ddcf, + 0xb67f6455292cbf08, 0x1a3bc84c17b1d542, + 0xe41f3d6a7377eeca, 0x20caba5f1d9e4a93, + 0x8e938662882af53e, 0x547eb47b7282ee9c, + 0xb23867fb2a35b28d, 0xe99e619a4f23aa43, + 0xdec681f9f4c31f31, 0x6405fa00e2ec94d4, + 0x8b3c113c38f9f37e, 0xde83bc408dd3dd04, + 0xae0b158b4738705e, 0x9624ab50b148d445, + 0xd98ddaee19068c76, 0x3badd624dd9b0957, + 0x87f8a8d4cfa417c9, 0xe54ca5d70a80e5d6, + 0xa9f6d30a038d1dbc, 0x5e9fcf4ccd211f4c, + 0xd47487cc8470652b, 0x7647c3200069671f, + 0x84c8d4dfd2c63f3b, 0x29ecd9f40041e073, + 0xa5fb0a17c777cf09, 0xf468107100525890, + 0xcf79cc9db955c2cc, 0x7182148d4066eeb4, + 0x81ac1fe293d599bf, 0xc6f14cd848405530, + 0xa21727db38cb002f, 0xb8ada00e5a506a7c, + 0xca9cf1d206fdc03b, 0xa6d90811f0e4851c, + 0xfd442e4688bd304a, 0x908f4a166d1da663, + 0x9e4a9cec15763e2e, 0x9a598e4e043287fe, + 0xc5dd44271ad3cdba, 0x40eff1e1853f29fd, + 0xf7549530e188c128, 0xd12bee59e68ef47c, + 0x9a94dd3e8cf578b9, 0x82bb74f8301958ce, + 0xc13a148e3032d6e7, 0xe36a52363c1faf01, + 0xf18899b1bc3f8ca1, 0xdc44e6c3cb279ac1, + 0x96f5600f15a7b7e5, 0x29ab103a5ef8c0b9, + 0xbcb2b812db11a5de, 0x7415d448f6b6f0e7, + 0xebdf661791d60f56, 0x111b495b3464ad21, + 0x936b9fcebb25c995, 0xcab10dd900beec34, + 0xb84687c269ef3bfb, 0x3d5d514f40eea742, + 0xe65829b3046b0afa, 0xcb4a5a3112a5112, + 0x8ff71a0fe2c2e6dc, 0x47f0e785eaba72ab, + 0xb3f4e093db73a093, 0x59ed216765690f56, + 0xe0f218b8d25088b8, 0x306869c13ec3532c, + 0x8c974f7383725573, 0x1e414218c73a13fb, + 0xafbd2350644eeacf, 0xe5d1929ef90898fa, + 0xdbac6c247d62a583, 0xdf45f746b74abf39, + 0x894bc396ce5da772, 0x6b8bba8c328eb783, + 0xab9eb47c81f5114f, 0x66ea92f3f326564, + 0xd686619ba27255a2, 0xc80a537b0efefebd, + 0x8613fd0145877585, 0xbd06742ce95f5f36, + 0xa798fc4196e952e7, 0x2c48113823b73704, + 0xd17f3b51fca3a7a0, 0xf75a15862ca504c5, + 0x82ef85133de648c4, 0x9a984d73dbe722fb, + 0xa3ab66580d5fdaf5, 0xc13e60d0d2e0ebba, + 0xcc963fee10b7d1b3, 0x318df905079926a8, + 0xffbbcfe994e5c61f, 0xfdf17746497f7052, + 0x9fd561f1fd0f9bd3, 0xfeb6ea8bedefa633, + 0xc7caba6e7c5382c8, 0xfe64a52ee96b8fc0, + 0xf9bd690a1b68637b, 0x3dfdce7aa3c673b0, + 0x9c1661a651213e2d, 0x6bea10ca65c084e, + 0xc31bfa0fe5698db8, 0x486e494fcff30a62, + 0xf3e2f893dec3f126, 0x5a89dba3c3efccfa, + 0x986ddb5c6b3a76b7, 0xf89629465a75e01c, + 0xbe89523386091465, 0xf6bbb397f1135823, + 0xee2ba6c0678b597f, 0x746aa07ded582e2c, + 0x94db483840b717ef, 0xa8c2a44eb4571cdc, + 0xba121a4650e4ddeb, 0x92f34d62616ce413, + 0xe896a0d7e51e1566, 0x77b020baf9c81d17, + 0x915e2486ef32cd60, 0xace1474dc1d122e, + 0xb5b5ada8aaff80b8, 0xd819992132456ba, + 0xe3231912d5bf60e6, 0x10e1fff697ed6c69, + 0x8df5efabc5979c8f, 0xca8d3ffa1ef463c1, + 0xb1736b96b6fd83b3, 0xbd308ff8a6b17cb2, + 0xddd0467c64bce4a0, 0xac7cb3f6d05ddbde, + 0x8aa22c0dbef60ee4, 0x6bcdf07a423aa96b, + 0xad4ab7112eb3929d, 0x86c16c98d2c953c6, + 0xd89d64d57a607744, 0xe871c7bf077ba8b7, + 0x87625f056c7c4a8b, 0x11471cd764ad4972, + 0xa93af6c6c79b5d2d, 0xd598e40d3dd89bcf, + 0xd389b47879823479, 0x4aff1d108d4ec2c3, + 0x843610cb4bf160cb, 0xcedf722a585139ba, + 0xa54394fe1eedb8fe, 0xc2974eb4ee658828, + 0xce947a3da6a9273e, 0x733d226229feea32, + 0x811ccc668829b887, 0x806357d5a3f525f, + 0xa163ff802a3426a8, 0xca07c2dcb0cf26f7, + 0xc9bcff6034c13052, 0xfc89b393dd02f0b5, + 0xfc2c3f3841f17c67, 0xbbac2078d443ace2, + 0x9d9ba7832936edc0, 0xd54b944b84aa4c0d, + 0xc5029163f384a931, 0xa9e795e65d4df11, + 0xf64335bcf065d37d, 0x4d4617b5ff4a16d5, + 0x99ea0196163fa42e, 0x504bced1bf8e4e45, + 0xc06481fb9bcf8d39, 0xe45ec2862f71e1d6, + 0xf07da27a82c37088, 0x5d767327bb4e5a4c, + 0x964e858c91ba2655, 0x3a6a07f8d510f86f, + 0xbbe226efb628afea, 0x890489f70a55368b, + 0xeadab0aba3b2dbe5, 0x2b45ac74ccea842e, + 0x92c8ae6b464fc96f, 0x3b0b8bc90012929d, + 0xb77ada0617e3bbcb, 0x9ce6ebb40173744, + 0xe55990879ddcaabd, 0xcc420a6a101d0515, + 0x8f57fa54c2a9eab6, 0x9fa946824a12232d, + 0xb32df8e9f3546564, 0x47939822dc96abf9, + 0xdff9772470297ebd, 0x59787e2b93bc56f7, + 0x8bfbea76c619ef36, 0x57eb4edb3c55b65a, + 0xaefae51477a06b03, 0xede622920b6b23f1, + 0xdab99e59958885c4, 0xe95fab368e45eced, + 0x88b402f7fd75539b, 0x11dbcb0218ebb414, + 0xaae103b5fcd2a881, 0xd652bdc29f26a119, + 0xd59944a37c0752a2, 0x4be76d3346f0495f, + 0x857fcae62d8493a5, 0x6f70a4400c562ddb, + 0xa6dfbd9fb8e5b88e, 0xcb4ccd500f6bb952, + 0xd097ad07a71f26b2, 0x7e2000a41346a7a7, + 0x825ecc24c873782f, 0x8ed400668c0c28c8, + 0xa2f67f2dfa90563b, 0x728900802f0f32fa, + 0xcbb41ef979346bca, 0x4f2b40a03ad2ffb9, + 0xfea126b7d78186bc, 0xe2f610c84987bfa8, + 0x9f24b832e6b0f436, 0xdd9ca7d2df4d7c9, + 0xc6ede63fa05d3143, 0x91503d1c79720dbb, + 0xf8a95fcf88747d94, 0x75a44c6397ce912a, + 0x9b69dbe1b548ce7c, 0xc986afbe3ee11aba, + 0xc24452da229b021b, 0xfbe85badce996168, + 0xf2d56790ab41c2a2, 0xfae27299423fb9c3, + 0x97c560ba6b0919a5, 0xdccd879fc967d41a, + 0xbdb6b8e905cb600f, 0x5400e987bbc1c920, + 0xed246723473e3813, 0x290123e9aab23b68, + 0x9436c0760c86e30b, 0xf9a0b6720aaf6521, + 0xb94470938fa89bce, 0xf808e40e8d5b3e69, + 0xe7958cb87392c2c2, 0xb60b1d1230b20e04, + 0x90bd77f3483bb9b9, 0xb1c6f22b5e6f48c2, + 0xb4ecd5f01a4aa828, 0x1e38aeb6360b1af3, + 0xe2280b6c20dd5232, 0x25c6da63c38de1b0, + 0x8d590723948a535f, 0x579c487e5a38ad0e, + 0xb0af48ec79ace837, 0x2d835a9df0c6d851, + 0xdcdb1b2798182244, 0xf8e431456cf88e65, + 0x8a08f0f8bf0f156b, 0x1b8e9ecb641b58ff, + 0xac8b2d36eed2dac5, 0xe272467e3d222f3f, + 0xd7adf884aa879177, 0x5b0ed81dcc6abb0f, + 0x86ccbb52ea94baea, 0x98e947129fc2b4e9, + 0xa87fea27a539e9a5, 0x3f2398d747b36224, + 0xd29fe4b18e88640e, 0x8eec7f0d19a03aad, + 0x83a3eeeef9153e89, 0x1953cf68300424ac, + 0xa48ceaaab75a8e2b, 0x5fa8c3423c052dd7, + 0xcdb02555653131b6, 0x3792f412cb06794d, + 0x808e17555f3ebf11, 0xe2bbd88bbee40bd0, + 0xa0b19d2ab70e6ed6, 0x5b6aceaeae9d0ec4, + 0xc8de047564d20a8b, 0xf245825a5a445275, + 0xfb158592be068d2e, 0xeed6e2f0f0d56712, + 0x9ced737bb6c4183d, 0x55464dd69685606b, + 0xc428d05aa4751e4c, 0xaa97e14c3c26b886, + 0xf53304714d9265df, 0xd53dd99f4b3066a8, + 0x993fe2c6d07b7fab, 0xe546a8038efe4029, + 0xbf8fdb78849a5f96, 0xde98520472bdd033, + 0xef73d256a5c0f77c, 0x963e66858f6d4440, + 0x95a8637627989aad, 0xdde7001379a44aa8, + 0xbb127c53b17ec159, 0x5560c018580d5d52, + 0xe9d71b689dde71af, 0xaab8f01e6e10b4a6, + 0x9226712162ab070d, 0xcab3961304ca70e8, + 0xb6b00d69bb55c8d1, 0x3d607b97c5fd0d22, + 0xe45c10c42a2b3b05, 0x8cb89a7db77c506a, + 0x8eb98a7a9a5b04e3, 0x77f3608e92adb242, + 0xb267ed1940f1c61c, 0x55f038b237591ed3, + 0xdf01e85f912e37a3, 0x6b6c46dec52f6688, + 0x8b61313bbabce2c6, 0x2323ac4b3b3da015, + 0xae397d8aa96c1b77, 0xabec975e0a0d081a, + 0xd9c7dced53c72255, 0x96e7bd358c904a21, + 0x881cea14545c7575, 0x7e50d64177da2e54, + 0xaa242499697392d2, 0xdde50bd1d5d0b9e9, + 0xd4ad2dbfc3d07787, 0x955e4ec64b44e864, + 0x84ec3c97da624ab4, 0xbd5af13bef0b113e, + 0xa6274bbdd0fadd61, 0xecb1ad8aeacdd58e, + 0xcfb11ead453994ba, 0x67de18eda5814af2, + 0x81ceb32c4b43fcf4, 0x80eacf948770ced7, + 0xa2425ff75e14fc31, 0xa1258379a94d028d, + 0xcad2f7f5359a3b3e, 0x96ee45813a04330, + 0xfd87b5f28300ca0d, 0x8bca9d6e188853fc, + 0x9e74d1b791e07e48, 0x775ea264cf55347e, + 0xc612062576589dda, 0x95364afe032a819e, + 0xf79687aed3eec551, 0x3a83ddbd83f52205, + 0x9abe14cd44753b52, 0xc4926a9672793543, + 0xc16d9a0095928a27, 0x75b7053c0f178294, + 0xf1c90080baf72cb1, 0x5324c68b12dd6339, + 0x971da05074da7bee, 0xd3f6fc16ebca5e04, + 0xbce5086492111aea, 0x88f4bb1ca6bcf585, + 0xec1e4a7db69561a5, 0x2b31e9e3d06c32e6, + 0x9392ee8e921d5d07, 0x3aff322e62439fd0, + 0xb877aa3236a4b449, 0x9befeb9fad487c3, + 0xe69594bec44de15b, 0x4c2ebe687989a9b4, + 0x901d7cf73ab0acd9, 0xf9d37014bf60a11, + 0xb424dc35095cd80f, 0x538484c19ef38c95, + 0xe12e13424bb40e13, 0x2865a5f206b06fba, + 0x8cbccc096f5088cb, 0xf93f87b7442e45d4, + 0xafebff0bcb24aafe, 0xf78f69a51539d749, + 0xdbe6fecebdedd5be, 0xb573440e5a884d1c, + 0x89705f4136b4a597, 0x31680a88f8953031, + 0xabcc77118461cefc, 0xfdc20d2b36ba7c3e, + 0xd6bf94d5e57a42bc, 0x3d32907604691b4d, + 0x8637bd05af6c69b5, 0xa63f9a49c2c1b110, + 0xa7c5ac471b478423, 0xfcf80dc33721d54, + 0xd1b71758e219652b, 0xd3c36113404ea4a9, + 0x83126e978d4fdf3b, 0x645a1cac083126ea, + 0xa3d70a3d70a3d70a, 0x3d70a3d70a3d70a4, + 0xcccccccccccccccc, 0xcccccccccccccccd, + 0x8000000000000000, 0x0, + 0xa000000000000000, 0x0, + 0xc800000000000000, 0x0, + 0xfa00000000000000, 0x0, + 0x9c40000000000000, 0x0, + 0xc350000000000000, 0x0, + 0xf424000000000000, 0x0, + 0x9896800000000000, 0x0, + 0xbebc200000000000, 0x0, + 0xee6b280000000000, 0x0, + 0x9502f90000000000, 0x0, + 0xba43b74000000000, 0x0, + 0xe8d4a51000000000, 0x0, + 0x9184e72a00000000, 0x0, + 0xb5e620f480000000, 0x0, + 0xe35fa931a0000000, 0x0, + 0x8e1bc9bf04000000, 0x0, + 0xb1a2bc2ec5000000, 0x0, + 0xde0b6b3a76400000, 0x0, + 0x8ac7230489e80000, 0x0, + 0xad78ebc5ac620000, 0x0, + 0xd8d726b7177a8000, 0x0, + 0x878678326eac9000, 0x0, + 0xa968163f0a57b400, 0x0, + 0xd3c21bcecceda100, 0x0, + 0x84595161401484a0, 0x0, + 0xa56fa5b99019a5c8, 0x0, + 0xcecb8f27f4200f3a, 0x0, + 0x813f3978f8940984, 0x4000000000000000, + 0xa18f07d736b90be5, 0x5000000000000000, + 0xc9f2c9cd04674ede, 0xa400000000000000, + 0xfc6f7c4045812296, 0x4d00000000000000, + 0x9dc5ada82b70b59d, 0xf020000000000000, + 0xc5371912364ce305, 0x6c28000000000000, + 0xf684df56c3e01bc6, 0xc732000000000000, + 0x9a130b963a6c115c, 0x3c7f400000000000, + 0xc097ce7bc90715b3, 0x4b9f100000000000, + 0xf0bdc21abb48db20, 0x1e86d40000000000, + 0x96769950b50d88f4, 0x1314448000000000, + 0xbc143fa4e250eb31, 0x17d955a000000000, + 0xeb194f8e1ae525fd, 0x5dcfab0800000000, + 0x92efd1b8d0cf37be, 0x5aa1cae500000000, + 0xb7abc627050305ad, 0xf14a3d9e40000000, + 0xe596b7b0c643c719, 0x6d9ccd05d0000000, + 0x8f7e32ce7bea5c6f, 0xe4820023a2000000, + 0xb35dbf821ae4f38b, 0xdda2802c8a800000, + 0xe0352f62a19e306e, 0xd50b2037ad200000, + 0x8c213d9da502de45, 0x4526f422cc340000, + 0xaf298d050e4395d6, 0x9670b12b7f410000, + 0xdaf3f04651d47b4c, 0x3c0cdd765f114000, + 0x88d8762bf324cd0f, 0xa5880a69fb6ac800, + 0xab0e93b6efee0053, 0x8eea0d047a457a00, + 0xd5d238a4abe98068, 0x72a4904598d6d880, + 0x85a36366eb71f041, 0x47a6da2b7f864750, + 0xa70c3c40a64e6c51, 0x999090b65f67d924, + 0xd0cf4b50cfe20765, 0xfff4b4e3f741cf6d, + 0x82818f1281ed449f, 0xbff8f10e7a8921a4, + 0xa321f2d7226895c7, 0xaff72d52192b6a0d, + 0xcbea6f8ceb02bb39, 0x9bf4f8a69f764490, + 0xfee50b7025c36a08, 0x2f236d04753d5b4, + 0x9f4f2726179a2245, 0x1d762422c946590, + 0xc722f0ef9d80aad6, 0x424d3ad2b7b97ef5, + 0xf8ebad2b84e0d58b, 0xd2e0898765a7deb2, + 0x9b934c3b330c8577, 0x63cc55f49f88eb2f, + 0xc2781f49ffcfa6d5, 0x3cbf6b71c76b25fb, + 0xf316271c7fc3908a, 0x8bef464e3945ef7a, + 0x97edd871cfda3a56, 0x97758bf0e3cbb5ac, + 0xbde94e8e43d0c8ec, 0x3d52eeed1cbea317, + 0xed63a231d4c4fb27, 0x4ca7aaa863ee4bdd, + 0x945e455f24fb1cf8, 0x8fe8caa93e74ef6a, + 0xb975d6b6ee39e436, 0xb3e2fd538e122b44, + 0xe7d34c64a9c85d44, 0x60dbbca87196b616, + 0x90e40fbeea1d3a4a, 0xbc8955e946fe31cd, + 0xb51d13aea4a488dd, 0x6babab6398bdbe41, + 0xe264589a4dcdab14, 0xc696963c7eed2dd1, + 0x8d7eb76070a08aec, 0xfc1e1de5cf543ca2, + 0xb0de65388cc8ada8, 0x3b25a55f43294bcb, + 0xdd15fe86affad912, 0x49ef0eb713f39ebe, + 0x8a2dbf142dfcc7ab, 0x6e3569326c784337, + 0xacb92ed9397bf996, 0x49c2c37f07965404, + 0xd7e77a8f87daf7fb, 0xdc33745ec97be906, + 0x86f0ac99b4e8dafd, 0x69a028bb3ded71a3, + 0xa8acd7c0222311bc, 0xc40832ea0d68ce0c, + 0xd2d80db02aabd62b, 0xf50a3fa490c30190, + 0x83c7088e1aab65db, 0x792667c6da79e0fa, + 0xa4b8cab1a1563f52, 0x577001b891185938, + 0xcde6fd5e09abcf26, 0xed4c0226b55e6f86, + 0x80b05e5ac60b6178, 0x544f8158315b05b4, + 0xa0dc75f1778e39d6, 0x696361ae3db1c721, + 0xc913936dd571c84c, 0x3bc3a19cd1e38e9, + 0xfb5878494ace3a5f, 0x4ab48a04065c723, + 0x9d174b2dcec0e47b, 0x62eb0d64283f9c76, + 0xc45d1df942711d9a, 0x3ba5d0bd324f8394, + 0xf5746577930d6500, 0xca8f44ec7ee36479, + 0x9968bf6abbe85f20, 0x7e998b13cf4e1ecb, + 0xbfc2ef456ae276e8, 0x9e3fedd8c321a67e, + 0xefb3ab16c59b14a2, 0xc5cfe94ef3ea101e, + 0x95d04aee3b80ece5, 0xbba1f1d158724a12, + 0xbb445da9ca61281f, 0x2a8a6e45ae8edc97, + 0xea1575143cf97226, 0xf52d09d71a3293bd, + 0x924d692ca61be758, 0x593c2626705f9c56, + 0xb6e0c377cfa2e12e, 0x6f8b2fb00c77836c, + 0xe498f455c38b997a, 0xb6dfb9c0f956447, + 0x8edf98b59a373fec, 0x4724bd4189bd5eac, + 0xb2977ee300c50fe7, 0x58edec91ec2cb657, + 0xdf3d5e9bc0f653e1, 0x2f2967b66737e3ed, + 0x8b865b215899f46c, 0xbd79e0d20082ee74, + 0xae67f1e9aec07187, 0xecd8590680a3aa11, + 0xda01ee641a708de9, 0xe80e6f4820cc9495, + 0x884134fe908658b2, 0x3109058d147fdcdd, + 0xaa51823e34a7eede, 0xbd4b46f0599fd415, + 0xd4e5e2cdc1d1ea96, 0x6c9e18ac7007c91a, + 0x850fadc09923329e, 0x3e2cf6bc604ddb0, + 0xa6539930bf6bff45, 0x84db8346b786151c, + 0xcfe87f7cef46ff16, 0xe612641865679a63, + 0x81f14fae158c5f6e, 0x4fcb7e8f3f60c07e, + 0xa26da3999aef7749, 0xe3be5e330f38f09d, + 0xcb090c8001ab551c, 0x5cadf5bfd3072cc5, + 0xfdcb4fa002162a63, 0x73d9732fc7c8f7f6, + 0x9e9f11c4014dda7e, 0x2867e7fddcdd9afa, + 0xc646d63501a1511d, 0xb281e1fd541501b8, + 0xf7d88bc24209a565, 0x1f225a7ca91a4226, + 0x9ae757596946075f, 0x3375788de9b06958, + 0xc1a12d2fc3978937, 0x52d6b1641c83ae, + 0xf209787bb47d6b84, 0xc0678c5dbd23a49a, + 0x9745eb4d50ce6332, 0xf840b7ba963646e0, + 0xbd176620a501fbff, 0xb650e5a93bc3d898, + 0xec5d3fa8ce427aff, 0xa3e51f138ab4cebe, + 0x93ba47c980e98cdf, 0xc66f336c36b10137, + 0xb8a8d9bbe123f017, 0xb80b0047445d4184, + 0xe6d3102ad96cec1d, 0xa60dc059157491e5, + 0x9043ea1ac7e41392, 0x87c89837ad68db2f, + 0xb454e4a179dd1877, 0x29babe4598c311fb, + 0xe16a1dc9d8545e94, 0xf4296dd6fef3d67a, + 0x8ce2529e2734bb1d, 0x1899e4a65f58660c, + 0xb01ae745b101e9e4, 0x5ec05dcff72e7f8f, + 0xdc21a1171d42645d, 0x76707543f4fa1f73, + 0x899504ae72497eba, 0x6a06494a791c53a8, + 0xabfa45da0edbde69, 0x487db9d17636892, + 0xd6f8d7509292d603, 0x45a9d2845d3c42b6, + 0x865b86925b9bc5c2, 0xb8a2392ba45a9b2, + 0xa7f26836f282b732, 0x8e6cac7768d7141e, + 0xd1ef0244af2364ff, 0x3207d795430cd926, + 0x8335616aed761f1f, 0x7f44e6bd49e807b8, + 0xa402b9c5a8d3a6e7, 0x5f16206c9c6209a6, + 0xcd036837130890a1, 0x36dba887c37a8c0f, + 0x802221226be55a64, 0xc2494954da2c9789, + 0xa02aa96b06deb0fd, 0xf2db9baa10b7bd6c, + 0xc83553c5c8965d3d, 0x6f92829494e5acc7, + 0xfa42a8b73abbf48c, 0xcb772339ba1f17f9, + 0x9c69a97284b578d7, 0xff2a760414536efb, + 0xc38413cf25e2d70d, 0xfef5138519684aba, + 0xf46518c2ef5b8cd1, 0x7eb258665fc25d69, + 0x98bf2f79d5993802, 0xef2f773ffbd97a61, + 0xbeeefb584aff8603, 0xaafb550ffacfd8fa, + 0xeeaaba2e5dbf6784, 0x95ba2a53f983cf38, + 0x952ab45cfa97a0b2, 0xdd945a747bf26183, + 0xba756174393d88df, 0x94f971119aeef9e4, + 0xe912b9d1478ceb17, 0x7a37cd5601aab85d, + 0x91abb422ccb812ee, 0xac62e055c10ab33a, + 0xb616a12b7fe617aa, 0x577b986b314d6009, + 0xe39c49765fdf9d94, 0xed5a7e85fda0b80b, + 0x8e41ade9fbebc27d, 0x14588f13be847307, + 0xb1d219647ae6b31c, 0x596eb2d8ae258fc8, + 0xde469fbd99a05fe3, 0x6fca5f8ed9aef3bb, + 0x8aec23d680043bee, 0x25de7bb9480d5854, + 0xada72ccc20054ae9, 0xaf561aa79a10ae6a, + 0xd910f7ff28069da4, 0x1b2ba1518094da04, + 0x87aa9aff79042286, 0x90fb44d2f05d0842, + 0xa99541bf57452b28, 0x353a1607ac744a53, + 0xd3fa922f2d1675f2, 0x42889b8997915ce8, + 0x847c9b5d7c2e09b7, 0x69956135febada11, + 0xa59bc234db398c25, 0x43fab9837e699095, + 0xcf02b2c21207ef2e, 0x94f967e45e03f4bb, + 0x8161afb94b44f57d, 0x1d1be0eebac278f5, + 0xa1ba1ba79e1632dc, 0x6462d92a69731732, + 0xca28a291859bbf93, 0x7d7b8f7503cfdcfe, + 0xfcb2cb35e702af78, 0x5cda735244c3d43e, + 0x9defbf01b061adab, 0x3a0888136afa64a7, + 0xc56baec21c7a1916, 0x88aaa1845b8fdd0, + 0xf6c69a72a3989f5b, 0x8aad549e57273d45, + 0x9a3c2087a63f6399, 0x36ac54e2f678864b, + 0xc0cb28a98fcf3c7f, 0x84576a1bb416a7dd, + 0xf0fdf2d3f3c30b9f, 0x656d44a2a11c51d5, + 0x969eb7c47859e743, 0x9f644ae5a4b1b325, + 0xbc4665b596706114, 0x873d5d9f0dde1fee, + 0xeb57ff22fc0c7959, 0xa90cb506d155a7ea, + 0x9316ff75dd87cbd8, 0x9a7f12442d588f2, + 0xb7dcbf5354e9bece, 0xc11ed6d538aeb2f, + 0xe5d3ef282a242e81, 0x8f1668c8a86da5fa, + 0x8fa475791a569d10, 0xf96e017d694487bc, + 0xb38d92d760ec4455, 0x37c981dcc395a9ac, + 0xe070f78d3927556a, 0x85bbe253f47b1417, + 0x8c469ab843b89562, 0x93956d7478ccec8e, + 0xaf58416654a6babb, 0x387ac8d1970027b2, + 0xdb2e51bfe9d0696a, 0x6997b05fcc0319e, + 0x88fcf317f22241e2, 0x441fece3bdf81f03, + 0xab3c2fddeeaad25a, 0xd527e81cad7626c3, + 0xd60b3bd56a5586f1, 0x8a71e223d8d3b074, + 0x85c7056562757456, 0xf6872d5667844e49, + 0xa738c6bebb12d16c, 0xb428f8ac016561db, + 0xd106f86e69d785c7, 0xe13336d701beba52, + 0x82a45b450226b39c, 0xecc0024661173473, + 0xa34d721642b06084, 0x27f002d7f95d0190, + 0xcc20ce9bd35c78a5, 0x31ec038df7b441f4, + 0xff290242c83396ce, 0x7e67047175a15271, + 0x9f79a169bd203e41, 0xf0062c6e984d386, + 0xc75809c42c684dd1, 0x52c07b78a3e60868, + 0xf92e0c3537826145, 0xa7709a56ccdf8a82, + 0x9bbcc7a142b17ccb, 0x88a66076400bb691, + 0xc2abf989935ddbfe, 0x6acff893d00ea435, + 0xf356f7ebf83552fe, 0x583f6b8c4124d43, + 0x98165af37b2153de, 0xc3727a337a8b704a, + 0xbe1bf1b059e9a8d6, 0x744f18c0592e4c5c, + 0xeda2ee1c7064130c, 0x1162def06f79df73, + 0x9485d4d1c63e8be7, 0x8addcb5645ac2ba8, + 0xb9a74a0637ce2ee1, 0x6d953e2bd7173692, + 0xe8111c87c5c1ba99, 0xc8fa8db6ccdd0437, + 0x910ab1d4db9914a0, 0x1d9c9892400a22a2, + 0xb54d5e4a127f59c8, 0x2503beb6d00cab4b, + 0xe2a0b5dc971f303a, 0x2e44ae64840fd61d, + 0x8da471a9de737e24, 0x5ceaecfed289e5d2, + 0xb10d8e1456105dad, 0x7425a83e872c5f47, + 0xdd50f1996b947518, 0xd12f124e28f77719, + 0x8a5296ffe33cc92f, 0x82bd6b70d99aaa6f, + 0xace73cbfdc0bfb7b, 0x636cc64d1001550b, + 0xd8210befd30efa5a, 0x3c47f7e05401aa4e, + 0x8714a775e3e95c78, 0x65acfaec34810a71, + 0xa8d9d1535ce3b396, 0x7f1839a741a14d0d, + 0xd31045a8341ca07c, 0x1ede48111209a050, + 0x83ea2b892091e44d, 0x934aed0aab460432, + 0xa4e4b66b68b65d60, 0xf81da84d5617853f, + 0xce1de40642e3f4b9, 0x36251260ab9d668e, + 0x80d2ae83e9ce78f3, 0xc1d72b7c6b426019, + 0xa1075a24e4421730, 0xb24cf65b8612f81f, + 0xc94930ae1d529cfc, 0xdee033f26797b627, + 0xfb9b7cd9a4a7443c, 0x169840ef017da3b1, + 0x9d412e0806e88aa5, 0x8e1f289560ee864e, + 0xc491798a08a2ad4e, 0xf1a6f2bab92a27e2, + 0xf5b5d7ec8acb58a2, 0xae10af696774b1db, + 0x9991a6f3d6bf1765, 0xacca6da1e0a8ef29, + 0xbff610b0cc6edd3f, 0x17fd090a58d32af3, + 0xeff394dcff8a948e, 0xddfc4b4cef07f5b0, + 0x95f83d0a1fb69cd9, 0x4abdaf101564f98e, + 0xbb764c4ca7a4440f, 0x9d6d1ad41abe37f1, + 0xea53df5fd18d5513, 0x84c86189216dc5ed, + 0x92746b9be2f8552c, 0x32fd3cf5b4e49bb4, + 0xb7118682dbb66a77, 0x3fbc8c33221dc2a1, + 0xe4d5e82392a40515, 0xfabaf3feaa5334a, + 0x8f05b1163ba6832d, 0x29cb4d87f2a7400e, + 0xb2c71d5bca9023f8, 0x743e20e9ef511012, + 0xdf78e4b2bd342cf6, 0x914da9246b255416, + 0x8bab8eefb6409c1a, 0x1ad089b6c2f7548e, + 0xae9672aba3d0c320, 0xa184ac2473b529b1, + 0xda3c0f568cc4f3e8, 0xc9e5d72d90a2741e, + 0x8865899617fb1871, 0x7e2fa67c7a658892, + 0xaa7eebfb9df9de8d, 0xddbb901b98feeab7, + 0xd51ea6fa85785631, 0x552a74227f3ea565, + 0x8533285c936b35de, 0xd53a88958f87275f, + 0xa67ff273b8460356, 0x8a892abaf368f137, + 0xd01fef10a657842c, 0x2d2b7569b0432d85, + 0x8213f56a67f6b29b, 0x9c3b29620e29fc73, + 0xa298f2c501f45f42, 0x8349f3ba91b47b8f, + 0xcb3f2f7642717713, 0x241c70a936219a73, + 0xfe0efb53d30dd4d7, 0xed238cd383aa0110, + 0x9ec95d1463e8a506, 0xf4363804324a40aa, + 0xc67bb4597ce2ce48, 0xb143c6053edcd0d5, + 0xf81aa16fdc1b81da, 0xdd94b7868e94050a, + 0x9b10a4e5e9913128, 0xca7cf2b4191c8326, + 0xc1d4ce1f63f57d72, 0xfd1c2f611f63a3f0, + 0xf24a01a73cf2dccf, 0xbc633b39673c8cec, + 0x976e41088617ca01, 0xd5be0503e085d813, + 0xbd49d14aa79dbc82, 0x4b2d8644d8a74e18, + 0xec9c459d51852ba2, 0xddf8e7d60ed1219e, + 0x93e1ab8252f33b45, 0xcabb90e5c942b503, + 0xb8da1662e7b00a17, 0x3d6a751f3b936243, + 0xe7109bfba19c0c9d, 0xcc512670a783ad4, + 0x906a617d450187e2, 0x27fb2b80668b24c5, + 0xb484f9dc9641e9da, 0xb1f9f660802dedf6, + 0xe1a63853bbd26451, 0x5e7873f8a0396973, + 0x8d07e33455637eb2, 0xdb0b487b6423e1e8, + 0xb049dc016abc5e5f, 0x91ce1a9a3d2cda62, + 0xdc5c5301c56b75f7, 0x7641a140cc7810fb, + 0x89b9b3e11b6329ba, 0xa9e904c87fcb0a9d, + 0xac2820d9623bf429, 0x546345fa9fbdcd44, + 0xd732290fbacaf133, 0xa97c177947ad4095, + 0x867f59a9d4bed6c0, 0x49ed8eabcccc485d, + 0xa81f301449ee8c70, 0x5c68f256bfff5a74, + 0xd226fc195c6a2f8c, 0x73832eec6fff3111, + 0x83585d8fd9c25db7, 0xc831fd53c5ff7eab, + 0xa42e74f3d032f525, 0xba3e7ca8b77f5e55, + 0xcd3a1230c43fb26f, 0x28ce1bd2e55f35eb, + 0x80444b5e7aa7cf85, 0x7980d163cf5b81b3, + 0xa0555e361951c366, 0xd7e105bcc332621f, + 0xc86ab5c39fa63440, 0x8dd9472bf3fefaa7, + 0xfa856334878fc150, 0xb14f98f6f0feb951, + 0x9c935e00d4b9d8d2, 0x6ed1bf9a569f33d3, + 0xc3b8358109e84f07, 0xa862f80ec4700c8, + 0xf4a642e14c6262c8, 0xcd27bb612758c0fa, + 0x98e7e9cccfbd7dbd, 0x8038d51cb897789c, + 0xbf21e44003acdd2c, 0xe0470a63e6bd56c3, + 0xeeea5d5004981478, 0x1858ccfce06cac74, + 0x95527a5202df0ccb, 0xf37801e0c43ebc8, + 0xbaa718e68396cffd, 0xd30560258f54e6ba, + 0xe950df20247c83fd, 0x47c6b82ef32a2069, + 0x91d28b7416cdd27e, 0x4cdc331d57fa5441, + 0xb6472e511c81471d, 0xe0133fe4adf8e952, + 0xe3d8f9e563a198e5, 0x58180fddd97723a6, + 0x8e679c2f5e44ff8f, 0x570f09eaa7ea7648, + }; +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template +constexpr uint64_t + powers_template::power_of_five_128[number_of_entries]; + +#endif + +using powers = powers_template<>; + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_DECIMAL_TO_BINARY_H +#define FASTFLOAT_DECIMAL_TO_BINARY_H + +#include +#include +#include +#include +#include +#include + +namespace fast_float { + +// This will compute or rather approximate w * 5**q and return a pair of 64-bit +// words approximating the result, with the "high" part corresponding to the +// most significant bits and the low part corresponding to the least significant +// bits. +// +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 value128 +compute_product_approximation(int64_t q, uint64_t w) { + const int index = 2 * int(q - powers::smallest_power_of_five); + // For small values of q, e.g., q in [0,27], the answer is always exact + // because The line value128 firstproduct = full_multiplication(w, + // power_of_five_128[index]); gives the exact answer. + value128 firstproduct = + full_multiplication(w, powers::power_of_five_128[index]); + static_assert((bit_precision >= 0) && (bit_precision <= 64), + " precision should be in (0,64]"); + constexpr uint64_t precision_mask = + (bit_precision < 64) ? (uint64_t(0xFFFFFFFFFFFFFFFF) >> bit_precision) + : uint64_t(0xFFFFFFFFFFFFFFFF); + if ((firstproduct.high & precision_mask) == + precision_mask) { // could further guard with (lower + w < lower) + // regarding the second product, we only need secondproduct.high, but our + // expectation is that the compiler will optimize this extra work away if + // needed. + value128 secondproduct = + full_multiplication(w, powers::power_of_five_128[index + 1]); + firstproduct.low += secondproduct.high; + if (secondproduct.high > firstproduct.low) { + firstproduct.high++; + } + } + return firstproduct; +} + +namespace detail { +/** + * For q in (0,350), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * floor(p) + q + * where + * p = log(5**q)/log(2) = q * log(5)/log(2) + * + * For negative values of q in (-400,0), we have that + * f = (((152170 + 65536) * q ) >> 16); + * is equal to + * -ceil(p) + q + * where + * p = log(5**-q)/log(2) = -q * log(5)/log(2) + */ +constexpr fastfloat_really_inline int32_t power(int32_t q) noexcept { + return (((152170 + 65536) * q) >> 16) + 63; +} +} // namespace detail + +// create an adjusted mantissa, biased by the invalid power2 +// for significant digits already multiplied by 10 ** q. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 adjusted_mantissa +compute_error_scaled(int64_t q, uint64_t w, int lz) noexcept { + int hilz = int(w >> 63) ^ 1; + adjusted_mantissa answer; + answer.mantissa = w << hilz; + int bias = binary::mantissa_explicit_bits() - binary::minimum_exponent(); + answer.power2 = int32_t(detail::power(int32_t(q)) + bias - hilz - lz - 62 + + invalid_am_bias); + return answer; +} + +// w * 10 ** q, without rounding the representation up. +// the power2 in the exponent will be adjusted by invalid_am_bias. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +compute_error(int64_t q, uint64_t w) noexcept { + int lz = leading_zeroes(w); + w <<= lz; + value128 product = + compute_product_approximation(q, w); + return compute_error_scaled(q, product.high, lz); +} + +// w * 10 ** q +// The returned value should be a valid ieee64 number that simply need to be +// packed. However, in some very rare cases, the computation will fail. In such +// cases, we return an adjusted_mantissa with a negative power of 2: the caller +// should recompute in such cases. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +compute_float(int64_t q, uint64_t w) noexcept { + adjusted_mantissa answer; + if ((w == 0) || (q < binary::smallest_power_of_ten())) { + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + if (q > binary::largest_power_of_ten()) { + // we want to get infinity: + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + return answer; + } + // At this point in time q is in [powers::smallest_power_of_five, + // powers::largest_power_of_five]. + + // We want the most significant bit of i to be 1. Shift if needed. + int lz = leading_zeroes(w); + w <<= lz; + + // The required precision is binary::mantissa_explicit_bits() + 3 because + // 1. We need the implicit bit + // 2. We need an extra bit for rounding purposes + // 3. We might lose a bit due to the "upperbit" routine (result too small, + // requiring a shift) + + value128 product = + compute_product_approximation(q, w); + // The computed 'product' is always sufficient. + // Mathematical proof: + // Noble Mushtak and Daniel Lemire, Fast Number Parsing Without Fallback (to + // appear) See script/mushtak_lemire.py + + // The "compute_product_approximation" function can be slightly slower than a + // branchless approach: value128 product = compute_product(q, w); but in + // practice, we can win big with the compute_product_approximation if its + // additional branch is easily predicted. Which is best is data specific. + int upperbit = int(product.high >> 63); + int shift = upperbit + 64 - binary::mantissa_explicit_bits() - 3; + + answer.mantissa = product.high >> shift; + + answer.power2 = int32_t(detail::power(int32_t(q)) + upperbit - lz - + binary::minimum_exponent()); + if (answer.power2 <= 0) { // we have a subnormal? + // Here have that answer.power2 <= 0 so -answer.power2 >= 0 + if (-answer.power2 + 1 >= + 64) { // if we have more than 64 bits below the minimum exponent, you + // have a zero for sure. + answer.power2 = 0; + answer.mantissa = 0; + // result should be zero + return answer; + } + // next line is safe because -answer.power2 + 1 < 64 + answer.mantissa >>= -answer.power2 + 1; + // Thankfully, we can't have both "round-to-even" and subnormals because + // "round-to-even" only occurs for powers close to 0. + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + // There is a weird scenario where we don't have a subnormal but just. + // Suppose we start with 2.2250738585072013e-308, we end up + // with 0x3fffffffffffff x 2^-1023-53 which is technically subnormal + // whereas 0x40000000000000 x 2^-1023-53 is normal. Now, we need to round + // up 0x3fffffffffffff x 2^-1023-53 and once we do, we are no longer + // subnormal, but we can only know this after rounding. + // So we only declare a subnormal if we are smaller than the threshold. + answer.power2 = + (answer.mantissa < (uint64_t(1) << binary::mantissa_explicit_bits())) + ? 0 + : 1; + return answer; + } + + // usually, we round *up*, but if we fall right in between and and we have an + // even basis, we need to round down + // We are only concerned with the cases where 5**q fits in single 64-bit word. + if ((product.low <= 1) && (q >= binary::min_exponent_round_to_even()) && + (q <= binary::max_exponent_round_to_even()) && + ((answer.mantissa & 3) == 1)) { // we may fall between two floats! + // To be in-between two floats we need that in doing + // answer.mantissa = product.high >> (upperbit + 64 - + // binary::mantissa_explicit_bits() - 3); + // ... we dropped out only zeroes. But if this happened, then we can go + // back!!! + if ((answer.mantissa << shift) == product.high) { + answer.mantissa &= ~uint64_t(1); // flip it so that we do not round up + } + } + + answer.mantissa += (answer.mantissa & 1); // round up + answer.mantissa >>= 1; + if (answer.mantissa >= (uint64_t(2) << binary::mantissa_explicit_bits())) { + answer.mantissa = (uint64_t(1) << binary::mantissa_explicit_bits()); + answer.power2++; // undo previous addition + } + + answer.mantissa &= ~(uint64_t(1) << binary::mantissa_explicit_bits()); + if (answer.power2 >= binary::infinite_power()) { // infinity + answer.power2 = binary::infinite_power(); + answer.mantissa = 0; + } + return answer; +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_BIGINT_H +#define FASTFLOAT_BIGINT_H + +#include +#include +#include +#include + + +namespace fast_float { + +// the limb width: we want efficient multiplication of double the bits in +// limb, or for 64-bit limbs, at least 64-bit multiplication where we can +// extract the high and low parts efficiently. this is every 64-bit +// architecture except for sparc, which emulates 128-bit multiplication. +// we might have platforms where `CHAR_BIT` is not 8, so let's avoid +// doing `8 * sizeof(limb)`. +#if defined(FASTFLOAT_64BIT) && !defined(__sparc) +#define FASTFLOAT_64BIT_LIMB 1 +typedef uint64_t limb; +constexpr size_t limb_bits = 64; +#else +#define FASTFLOAT_32BIT_LIMB +typedef uint32_t limb; +constexpr size_t limb_bits = 32; +#endif + +typedef span limb_span; + +// number of bits in a bigint. this needs to be at least the number +// of bits required to store the largest bigint, which is +// `log2(10**(digits + max_exp))`, or `log2(10**(767 + 342))`, or +// ~3600 bits, so we round to 4000. +constexpr size_t bigint_bits = 4000; +constexpr size_t bigint_limbs = bigint_bits / limb_bits; + +// vector-like type that is allocated on the stack. the entire +// buffer is pre-allocated, and only the length changes. +template struct stackvec { + limb data[size]; + // we never need more than 150 limbs + uint16_t length{0}; + + stackvec() = default; + stackvec(const stackvec &) = delete; + stackvec &operator=(const stackvec &) = delete; + stackvec(stackvec &&) = delete; + stackvec &operator=(stackvec &&other) = delete; + + // create stack vector from existing limb span. + FASTFLOAT_CONSTEXPR20 stackvec(limb_span s) { + FASTFLOAT_ASSERT(try_extend(s)); + } + + FASTFLOAT_CONSTEXPR14 limb &operator[](size_t index) noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + FASTFLOAT_CONSTEXPR14 const limb &operator[](size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + return data[index]; + } + // index from the end of the container + FASTFLOAT_CONSTEXPR14 const limb &rindex(size_t index) const noexcept { + FASTFLOAT_DEBUG_ASSERT(index < length); + size_t rindex = length - index - 1; + return data[rindex]; + } + + // set the length, without bounds checking. + FASTFLOAT_CONSTEXPR14 void set_len(size_t len) noexcept { + length = uint16_t(len); + } + constexpr size_t len() const noexcept { return length; } + constexpr bool is_empty() const noexcept { return length == 0; } + constexpr size_t capacity() const noexcept { return size; } + // append item to vector, without bounds checking + FASTFLOAT_CONSTEXPR14 void push_unchecked(limb value) noexcept { + data[length] = value; + length++; + } + // append item to vector, returning if item was added + FASTFLOAT_CONSTEXPR14 bool try_push(limb value) noexcept { + if (len() < capacity()) { + push_unchecked(value); + return true; + } else { + return false; + } + } + // add items to the vector, from a span, without bounds checking + FASTFLOAT_CONSTEXPR20 void extend_unchecked(limb_span s) noexcept { + limb *ptr = data + length; + std::copy_n(s.ptr, s.len(), ptr); + set_len(len() + s.len()); + } + // try to add items to the vector, returning if items were added + FASTFLOAT_CONSTEXPR20 bool try_extend(limb_span s) noexcept { + if (len() + s.len() <= capacity()) { + extend_unchecked(s); + return true; + } else { + return false; + } + } + // resize the vector, without bounds checking + // if the new size is longer than the vector, assign value to each + // appended item. + FASTFLOAT_CONSTEXPR20 + void resize_unchecked(size_t new_len, limb value) noexcept { + if (new_len > len()) { + size_t count = new_len - len(); + limb *first = data + len(); + limb *last = first + count; + ::std::fill(first, last, value); + set_len(new_len); + } else { + set_len(new_len); + } + } + // try to resize the vector, returning if the vector was resized. + FASTFLOAT_CONSTEXPR20 bool try_resize(size_t new_len, limb value) noexcept { + if (new_len > capacity()) { + return false; + } else { + resize_unchecked(new_len, value); + return true; + } + } + // check if any limbs are non-zero after the given index. + // this needs to be done in reverse order, since the index + // is relative to the most significant limbs. + FASTFLOAT_CONSTEXPR14 bool nonzero(size_t index) const noexcept { + while (index < len()) { + if (rindex(index) != 0) { + return true; + } + index++; + } + return false; + } + // normalize the big integer, so most-significant zero limbs are removed. + FASTFLOAT_CONSTEXPR14 void normalize() noexcept { + while (len() > 0 && rindex(0) == 0) { + length--; + } + } +}; + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 uint64_t +empty_hi64(bool &truncated) noexcept { + truncated = false; + return 0; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint64_hi64(uint64_t r0, bool &truncated) noexcept { + truncated = false; + int shl = leading_zeroes(r0); + return r0 << shl; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint64_hi64(uint64_t r0, uint64_t r1, bool &truncated) noexcept { + int shl = leading_zeroes(r0); + if (shl == 0) { + truncated = r1 != 0; + return r0; + } else { + int shr = 64 - shl; + truncated = (r1 << shl) != 0; + return (r0 << shl) | (r1 >> shr); + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint32_hi64(uint32_t r0, bool &truncated) noexcept { + return uint64_hi64(r0, truncated); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint32_hi64(uint32_t r0, uint32_t r1, bool &truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + return uint64_hi64((x0 << 32) | x1, truncated); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 uint64_t +uint32_hi64(uint32_t r0, uint32_t r1, uint32_t r2, bool &truncated) noexcept { + uint64_t x0 = r0; + uint64_t x1 = r1; + uint64_t x2 = r2; + return uint64_hi64(x0, (x1 << 32) | x2, truncated); +} + +// add two small integers, checking for overflow. +// we want an efficient operation. for msvc, where +// we don't have built-in intrinsics, this is still +// pretty fast. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb +scalar_add(limb x, limb y, bool &overflow) noexcept { + limb z; +// gcc and clang +#if defined(__has_builtin) +#if __has_builtin(__builtin_add_overflow) + if (!cpp20_and_in_constexpr()) { + overflow = __builtin_add_overflow(x, y, &z); + return z; + } +#endif +#endif + + // generic, this still optimizes correctly on MSVC. + z = x + y; + overflow = z < x; + return z; +} + +// multiply two small integers, getting both the high and low bits. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 limb +scalar_mul(limb x, limb y, limb &carry) noexcept { +#ifdef FASTFLOAT_64BIT_LIMB +#if defined(__SIZEOF_INT128__) + // GCC and clang both define it as an extension. + __uint128_t z = __uint128_t(x) * __uint128_t(y) + __uint128_t(carry); + carry = limb(z >> limb_bits); + return limb(z); +#else + // fallback, no native 128-bit integer multiplication with carry. + // on msvc, this optimizes identically, somehow. + value128 z = full_multiplication(x, y); + bool overflow; + z.low = scalar_add(z.low, carry, overflow); + z.high += uint64_t(overflow); // cannot overflow + carry = z.high; + return z.low; +#endif +#else + uint64_t z = uint64_t(x) * uint64_t(y) + uint64_t(carry); + carry = limb(z >> limb_bits); + return limb(z); +#endif +} + +// add scalar value to bigint starting from offset. +// used in grade school multiplication +template +inline FASTFLOAT_CONSTEXPR20 bool small_add_from(stackvec &vec, limb y, + size_t start) noexcept { + size_t index = start; + limb carry = y; + bool overflow; + while (carry != 0 && index < vec.len()) { + vec[index] = scalar_add(vec[index], carry, overflow); + carry = limb(overflow); + index += 1; + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add scalar value to bigint. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +small_add(stackvec &vec, limb y) noexcept { + return small_add_from(vec, y, 0); +} + +// multiply bigint by scalar value. +template +inline FASTFLOAT_CONSTEXPR20 bool small_mul(stackvec &vec, + limb y) noexcept { + limb carry = 0; + for (size_t index = 0; index < vec.len(); index++) { + vec[index] = scalar_mul(vec[index], y, carry); + } + if (carry != 0) { + FASTFLOAT_TRY(vec.try_push(carry)); + } + return true; +} + +// add bigint to bigint starting from index. +// used in grade school multiplication +template +FASTFLOAT_CONSTEXPR20 bool large_add_from(stackvec &x, limb_span y, + size_t start) noexcept { + // the effective x buffer is from `xstart..x.len()`, so exit early + // if we can't get that current range. + if (x.len() < start || y.len() > x.len() - start) { + FASTFLOAT_TRY(x.try_resize(y.len() + start, 0)); + } + + bool carry = false; + for (size_t index = 0; index < y.len(); index++) { + limb xi = x[index + start]; + limb yi = y[index]; + bool c1 = false; + bool c2 = false; + xi = scalar_add(xi, yi, c1); + if (carry) { + xi = scalar_add(xi, 1, c2); + } + x[index + start] = xi; + carry = c1 | c2; + } + + // handle overflow + if (carry) { + FASTFLOAT_TRY(small_add_from(x, 1, y.len() + start)); + } + return true; +} + +// add bigint to bigint. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +large_add_from(stackvec &x, limb_span y) noexcept { + return large_add_from(x, y, 0); +} + +// grade-school multiplication algorithm +template +FASTFLOAT_CONSTEXPR20 bool long_mul(stackvec &x, limb_span y) noexcept { + limb_span xs = limb_span(x.data, x.len()); + stackvec z(xs); + limb_span zs = limb_span(z.data, z.len()); + + if (y.len() != 0) { + limb y0 = y[0]; + FASTFLOAT_TRY(small_mul(x, y0)); + for (size_t index = 1; index < y.len(); index++) { + limb yi = y[index]; + stackvec zi; + if (yi != 0) { + // re-use the same buffer throughout + zi.set_len(0); + FASTFLOAT_TRY(zi.try_extend(zs)); + FASTFLOAT_TRY(small_mul(zi, yi)); + limb_span zis = limb_span(zi.data, zi.len()); + FASTFLOAT_TRY(large_add_from(x, zis, index)); + } + } + } + + x.normalize(); + return true; +} + +// grade-school multiplication algorithm +template +FASTFLOAT_CONSTEXPR20 bool large_mul(stackvec &x, limb_span y) noexcept { + if (y.len() == 1) { + FASTFLOAT_TRY(small_mul(x, y[0])); + } else { + FASTFLOAT_TRY(long_mul(x, y)); + } + return true; +} + +template struct pow5_tables { + static constexpr uint32_t large_step = 135; + static constexpr uint64_t small_power_of_5[] = { + 1UL, + 5UL, + 25UL, + 125UL, + 625UL, + 3125UL, + 15625UL, + 78125UL, + 390625UL, + 1953125UL, + 9765625UL, + 48828125UL, + 244140625UL, + 1220703125UL, + 6103515625UL, + 30517578125UL, + 152587890625UL, + 762939453125UL, + 3814697265625UL, + 19073486328125UL, + 95367431640625UL, + 476837158203125UL, + 2384185791015625UL, + 11920928955078125UL, + 59604644775390625UL, + 298023223876953125UL, + 1490116119384765625UL, + 7450580596923828125UL, + }; +#ifdef FASTFLOAT_64BIT_LIMB + constexpr static limb large_power_of_5[] = { + 1414648277510068013UL, 9180637584431281687UL, 4539964771860779200UL, + 10482974169319127550UL, 198276706040285095UL}; +#else + constexpr static limb large_power_of_5[] = { + 4279965485U, 329373468U, 4020270615U, 2137533757U, 4287402176U, + 1057042919U, 1071430142U, 2440757623U, 381945767U, 46164893U}; +#endif +}; + +#if FASTFLOAT_DETAIL_MUST_DEFINE_CONSTEXPR_VARIABLE + +template constexpr uint32_t pow5_tables::large_step; + +template constexpr uint64_t pow5_tables::small_power_of_5[]; + +template constexpr limb pow5_tables::large_power_of_5[]; + +#endif + +// big integer type. implements a small subset of big integer +// arithmetic, using simple algorithms since asymptotically +// faster algorithms are slower for a small number of limbs. +// all operations assume the big-integer is normalized. +struct bigint : pow5_tables<> { + // storage of the limbs, in little-endian order. + stackvec vec; + + FASTFLOAT_CONSTEXPR20 bigint() : vec() {} + bigint(const bigint &) = delete; + bigint &operator=(const bigint &) = delete; + bigint(bigint &&) = delete; + bigint &operator=(bigint &&other) = delete; + + FASTFLOAT_CONSTEXPR20 bigint(uint64_t value) : vec() { +#ifdef FASTFLOAT_64BIT_LIMB + vec.push_unchecked(value); +#else + vec.push_unchecked(uint32_t(value)); + vec.push_unchecked(uint32_t(value >> 32)); +#endif + vec.normalize(); + } + + // get the high 64 bits from the vector, and if bits were truncated. + // this is to get the significant digits for the float. + FASTFLOAT_CONSTEXPR20 uint64_t hi64(bool &truncated) const noexcept { +#ifdef FASTFLOAT_64BIT_LIMB + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint64_hi64(vec.rindex(0), truncated); + } else { + uint64_t result = uint64_hi64(vec.rindex(0), vec.rindex(1), truncated); + truncated |= vec.nonzero(2); + return result; + } +#else + if (vec.len() == 0) { + return empty_hi64(truncated); + } else if (vec.len() == 1) { + return uint32_hi64(vec.rindex(0), truncated); + } else if (vec.len() == 2) { + return uint32_hi64(vec.rindex(0), vec.rindex(1), truncated); + } else { + uint64_t result = + uint32_hi64(vec.rindex(0), vec.rindex(1), vec.rindex(2), truncated); + truncated |= vec.nonzero(3); + return result; + } +#endif + } + + // compare two big integers, returning the large value. + // assumes both are normalized. if the return value is + // negative, other is larger, if the return value is + // positive, this is larger, otherwise they are equal. + // the limbs are stored in little-endian order, so we + // must compare the limbs in ever order. + FASTFLOAT_CONSTEXPR20 int compare(const bigint &other) const noexcept { + if (vec.len() > other.vec.len()) { + return 1; + } else if (vec.len() < other.vec.len()) { + return -1; + } else { + for (size_t index = vec.len(); index > 0; index--) { + limb xi = vec[index - 1]; + limb yi = other.vec[index - 1]; + if (xi > yi) { + return 1; + } else if (xi < yi) { + return -1; + } + } + return 0; + } + } + + // shift left each limb n bits, carrying over to the new limb + // returns true if we were able to shift all the digits. + FASTFLOAT_CONSTEXPR20 bool shl_bits(size_t n) noexcept { + // Internally, for each item, we shift left by n, and add the previous + // right shifted limb-bits. + // For example, we transform (for u8) shifted left 2, to: + // b10100100 b01000010 + // b10 b10010001 b00001000 + FASTFLOAT_DEBUG_ASSERT(n != 0); + FASTFLOAT_DEBUG_ASSERT(n < sizeof(limb) * 8); + + size_t shl = n; + size_t shr = limb_bits - shl; + limb prev = 0; + for (size_t index = 0; index < vec.len(); index++) { + limb xi = vec[index]; + vec[index] = (xi << shl) | (prev >> shr); + prev = xi; + } + + limb carry = prev >> shr; + if (carry != 0) { + return vec.try_push(carry); + } + return true; + } + + // move the limbs left by `n` limbs. + FASTFLOAT_CONSTEXPR20 bool shl_limbs(size_t n) noexcept { + FASTFLOAT_DEBUG_ASSERT(n != 0); + if (n + vec.len() > vec.capacity()) { + return false; + } else if (!vec.is_empty()) { + // move limbs + limb *dst = vec.data + n; + const limb *src = vec.data; + std::copy_backward(src, src + vec.len(), dst + vec.len()); + // fill in empty limbs + limb *first = vec.data; + limb *last = first + n; + ::std::fill(first, last, 0); + vec.set_len(n + vec.len()); + return true; + } else { + return true; + } + } + + // move the limbs left by `n` bits. + FASTFLOAT_CONSTEXPR20 bool shl(size_t n) noexcept { + size_t rem = n % limb_bits; + size_t div = n / limb_bits; + if (rem != 0) { + FASTFLOAT_TRY(shl_bits(rem)); + } + if (div != 0) { + FASTFLOAT_TRY(shl_limbs(div)); + } + return true; + } + + // get the number of leading zeros in the bigint. + FASTFLOAT_CONSTEXPR20 int ctlz() const noexcept { + if (vec.is_empty()) { + return 0; + } else { +#ifdef FASTFLOAT_64BIT_LIMB + return leading_zeroes(vec.rindex(0)); +#else + // no use defining a specialized leading_zeroes for a 32-bit type. + uint64_t r0 = vec.rindex(0); + return leading_zeroes(r0 << 32); +#endif + } + } + + // get the number of bits in the bigint. + FASTFLOAT_CONSTEXPR20 int bit_length() const noexcept { + int lz = ctlz(); + return int(limb_bits * vec.len()) - lz; + } + + FASTFLOAT_CONSTEXPR20 bool mul(limb y) noexcept { return small_mul(vec, y); } + + FASTFLOAT_CONSTEXPR20 bool add(limb y) noexcept { return small_add(vec, y); } + + // multiply as if by 2 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow2(uint32_t exp) noexcept { return shl(exp); } + + // multiply as if by 5 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow5(uint32_t exp) noexcept { + // multiply by a power of 5 + size_t large_length = sizeof(large_power_of_5) / sizeof(limb); + limb_span large = limb_span(large_power_of_5, large_length); + while (exp >= large_step) { + FASTFLOAT_TRY(large_mul(vec, large)); + exp -= large_step; + } +#ifdef FASTFLOAT_64BIT_LIMB + uint32_t small_step = 27; + limb max_native = 7450580596923828125UL; +#else + uint32_t small_step = 13; + limb max_native = 1220703125U; +#endif + while (exp >= small_step) { + FASTFLOAT_TRY(small_mul(vec, max_native)); + exp -= small_step; + } + if (exp != 0) { + // Work around clang bug https://godbolt.org/z/zedh7rrhc + // This is similar to https://github.com/llvm/llvm-project/issues/47746, + // except the workaround described there don't work here + FASTFLOAT_TRY(small_mul( + vec, limb(((void)small_power_of_5[0], small_power_of_5[exp])))); + } + + return true; + } + + // multiply as if by 10 raised to a power. + FASTFLOAT_CONSTEXPR20 bool pow10(uint32_t exp) noexcept { + FASTFLOAT_TRY(pow5(exp)); + return pow2(exp); + } +}; + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_DIGIT_COMPARISON_H +#define FASTFLOAT_DIGIT_COMPARISON_H + +#include +#include +#include +#include + + +namespace fast_float { + +// 1e0 to 1e19 +constexpr static uint64_t powers_of_ten_uint64[] = {1UL, + 10UL, + 100UL, + 1000UL, + 10000UL, + 100000UL, + 1000000UL, + 10000000UL, + 100000000UL, + 1000000000UL, + 10000000000UL, + 100000000000UL, + 1000000000000UL, + 10000000000000UL, + 100000000000000UL, + 1000000000000000UL, + 10000000000000000UL, + 100000000000000000UL, + 1000000000000000000UL, + 10000000000000000000UL}; + +// calculate the exponent, in scientific notation, of the number. +// this algorithm is not even close to optimized, but it has no practical +// effect on performance: in order to have a faster algorithm, we'd need +// to slow down performance for faster algorithms, and this is still fast. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int32_t +scientific_exponent(parsed_number_string_t &num) noexcept { + uint64_t mantissa = num.mantissa; + int32_t exponent = int32_t(num.exponent); + while (mantissa >= 10000) { + mantissa /= 10000; + exponent += 4; + } + while (mantissa >= 100) { + mantissa /= 100; + exponent += 2; + } + while (mantissa >= 10) { + mantissa /= 10; + exponent += 1; + } + return exponent; +} + +// this converts a native floating-point number to an extended-precision float. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +to_extended(T value) noexcept { + using equiv_uint = typename binary_format::equiv_uint; + constexpr equiv_uint exponent_mask = binary_format::exponent_mask(); + constexpr equiv_uint mantissa_mask = binary_format::mantissa_mask(); + constexpr equiv_uint hidden_bit_mask = binary_format::hidden_bit_mask(); + + adjusted_mantissa am; + int32_t bias = binary_format::mantissa_explicit_bits() - + binary_format::minimum_exponent(); + equiv_uint bits; +#if FASTFLOAT_HAS_BIT_CAST + bits = std::bit_cast(value); +#else + ::memcpy(&bits, &value, sizeof(T)); +#endif + if ((bits & exponent_mask) == 0) { + // denormal + am.power2 = 1 - bias; + am.mantissa = bits & mantissa_mask; + } else { + // normal + am.power2 = int32_t((bits & exponent_mask) >> + binary_format::mantissa_explicit_bits()); + am.power2 -= bias; + am.mantissa = (bits & mantissa_mask) | hidden_bit_mask; + } + + return am; +} + +// get the extended precision value of the halfway point between b and b+u. +// we are given a native float that represents b, so we need to adjust it +// halfway between b and b+u. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +to_extended_halfway(T value) noexcept { + adjusted_mantissa am = to_extended(value); + am.mantissa <<= 1; + am.mantissa += 1; + am.power2 -= 1; + return am; +} + +// round an extended-precision float to the nearest machine float. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void round(adjusted_mantissa &am, + callback cb) noexcept { + int32_t mantissa_shift = 64 - binary_format::mantissa_explicit_bits() - 1; + if (-am.power2 >= mantissa_shift) { + // have a denormal float + int32_t shift = -am.power2 + 1; + cb(am, std::min(shift, 64)); + // check for round-up: if rounding-nearest carried us to the hidden bit. + am.power2 = (am.mantissa < + (uint64_t(1) << binary_format::mantissa_explicit_bits())) + ? 0 + : 1; + return; + } + + // have a normal float, use the default shift. + cb(am, mantissa_shift); + + // check for carry + if (am.mantissa >= + (uint64_t(2) << binary_format::mantissa_explicit_bits())) { + am.mantissa = (uint64_t(1) << binary_format::mantissa_explicit_bits()); + am.power2++; + } + + // check for infinite: we could have carried to an infinite power + am.mantissa &= ~(uint64_t(1) << binary_format::mantissa_explicit_bits()); + if (am.power2 >= binary_format::infinite_power()) { + am.power2 = binary_format::infinite_power(); + am.mantissa = 0; + } +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void +round_nearest_tie_even(adjusted_mantissa &am, int32_t shift, + callback cb) noexcept { + const uint64_t mask = (shift == 64) ? UINT64_MAX : (uint64_t(1) << shift) - 1; + const uint64_t halfway = (shift == 0) ? 0 : uint64_t(1) << (shift - 1); + uint64_t truncated_bits = am.mantissa & mask; + bool is_above = truncated_bits > halfway; + bool is_halfway = truncated_bits == halfway; + + // shift digits into position + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; + + bool is_odd = (am.mantissa & 1) == 1; + am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above)); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void +round_down(adjusted_mantissa &am, int32_t shift) noexcept { + if (shift == 64) { + am.mantissa = 0; + } else { + am.mantissa >>= shift; + } + am.power2 += shift; +} +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +skip_zeros(UC const *&first, UC const *last) noexcept { + uint64_t val; + while (!cpp20_and_in_constexpr() && + std::distance(first, last) >= int_cmp_len()) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != int_cmp_zeros()) { + break; + } + first += int_cmp_len(); + } + while (first != last) { + if (*first != UC('0')) { + break; + } + first++; + } +} + +// determine if any non-zero digits were truncated. +// all characters must be valid digits. +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +is_truncated(UC const *first, UC const *last) noexcept { + // do 8-bit optimizations, can just compare to 8 literal 0s. + uint64_t val; + while (!cpp20_and_in_constexpr() && + std::distance(first, last) >= int_cmp_len()) { + ::memcpy(&val, first, sizeof(uint64_t)); + if (val != int_cmp_zeros()) { + return true; + } + first += int_cmp_len(); + } + while (first != last) { + if (*first != UC('0')) { + return true; + } + ++first; + } + return false; +} +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 bool +is_truncated(span s) noexcept { + return is_truncated(s.ptr, s.ptr + s.len()); +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +parse_eight_digits(const UC *&p, limb &value, size_t &counter, + size_t &count) noexcept { + value = value * 100000000 + parse_eight_digits_unrolled(p); + p += 8; + counter += 8; + count += 8; +} + +template +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 void +parse_one_digit(UC const *&p, limb &value, size_t &counter, + size_t &count) noexcept { + value = value * 10 + limb(*p - UC('0')); + p++; + counter++; + count++; +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +add_native(bigint &big, limb power, limb value) noexcept { + big.mul(power); + big.add(value); +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void +round_up_bigint(bigint &big, size_t &count) noexcept { + // need to round-up the digits, but need to avoid rounding + // ....9999 to ...10000, which could cause a false halfway point. + add_native(big, 10, 1); + count++; +} + +// parse the significant digits into a big integer +template +inline FASTFLOAT_CONSTEXPR20 void +parse_mantissa(bigint &result, parsed_number_string_t &num, + size_t max_digits, size_t &digits) noexcept { + // try to minimize the number of big integer and scalar multiplication. + // therefore, try to parse 8 digits at a time, and multiply by the largest + // scalar value (9 or 19 digits) for each step. + size_t counter = 0; + digits = 0; + limb value = 0; +#ifdef FASTFLOAT_64BIT_LIMB + size_t step = 19; +#else + size_t step = 9; +#endif + + // process all integer digits. + UC const *p = num.integer.ptr; + UC const *pend = p + num.integer.len(); + skip_zeros(p, pend); + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && + (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (num.fraction.ptr != nullptr) { + truncated |= is_truncated(num.fraction); + } + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + + // add our fraction digits, if they're available. + if (num.fraction.ptr != nullptr) { + p = num.fraction.ptr; + pend = p + num.fraction.len(); + if (digits == 0) { + skip_zeros(p, pend); + } + // process all digits, in increments of step per loop + while (p != pend) { + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && + (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); + } + while (counter < step && p != pend && digits < max_digits) { + parse_one_digit(p, value, counter, digits); + } + if (digits == max_digits) { + // add the temporary value, then check if we've truncated any digits + add_native(result, limb(powers_of_ten_uint64[counter]), value); + bool truncated = is_truncated(p, pend); + if (truncated) { + round_up_bigint(result, digits); + } + return; + } else { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + counter = 0; + value = 0; + } + } + } + + if (counter != 0) { + add_native(result, limb(powers_of_ten_uint64[counter]), value); + } +} + +template +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +positive_digit_comp(bigint &bigmant, int32_t exponent) noexcept { + FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent))); + adjusted_mantissa answer; + bool truncated; + answer.mantissa = bigmant.hi64(truncated); + int bias = binary_format::mantissa_explicit_bits() - + binary_format::minimum_exponent(); + answer.power2 = bigmant.bit_length() - 64 + bias; + + round(answer, [truncated](adjusted_mantissa &a, int32_t shift) { + round_nearest_tie_even( + a, shift, + [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool { + return is_above || (is_halfway && truncated) || + (is_odd && is_halfway); + }); + }); + + return answer; +} + +// the scaling here is quite simple: we have, for the real digits `m * 10^e`, +// and for the theoretical digits `n * 2^f`. Since `e` is always negative, +// to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`. +// we then need to scale by `2^(f- e)`, and then the two significant digits +// are of the same magnitude. +template +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa negative_digit_comp( + bigint &bigmant, adjusted_mantissa am, int32_t exponent) noexcept { + bigint &real_digits = bigmant; + int32_t real_exp = exponent; + + // get the value of `b`, rounded down, and get a bigint representation of b+h + adjusted_mantissa am_b = am; + // gcc7 buf: use a lambda to remove the noexcept qualifier bug with + // -Wnoexcept-type. + round(am_b, + [](adjusted_mantissa &a, int32_t shift) { round_down(a, shift); }); + T b; + to_float(false, am_b, b); + adjusted_mantissa theor = to_extended_halfway(b); + bigint theor_digits(theor.mantissa); + int32_t theor_exp = theor.power2; + + // scale real digits and theor digits to be same power. + int32_t pow2_exp = theor_exp - real_exp; + uint32_t pow5_exp = uint32_t(-real_exp); + if (pow5_exp != 0) { + FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp)); + } + if (pow2_exp > 0) { + FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp))); + } else if (pow2_exp < 0) { + FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp))); + } + + // compare digits, and use it to director rounding + int ord = real_digits.compare(theor_digits); + adjusted_mantissa answer = am; + round(answer, [ord](adjusted_mantissa &a, int32_t shift) { + round_nearest_tie_even( + a, shift, [ord](bool is_odd, bool _, bool __) -> bool { + (void)_; // not needed, since we've done our comparison + (void)__; // not needed, since we've done our comparison + if (ord > 0) { + return true; + } else if (ord < 0) { + return false; + } else { + return is_odd; + } + }); + }); + + return answer; +} + +// parse the significant digits as a big integer to unambiguously round the +// the significant digits. here, we are trying to determine how to round +// an extended float representation close to `b+h`, halfway between `b` +// (the float rounded-down) and `b+u`, the next positive float. this +// algorithm is always correct, and uses one of two approaches. when +// the exponent is positive relative to the significant digits (such as +// 1234), we create a big-integer representation, get the high 64-bits, +// determine if any lower bits are truncated, and use that to direct +// rounding. in case of a negative exponent relative to the significant +// digits (such as 1.2345), we create a theoretical representation of +// `b` as a big-integer type, scaled to the same binary exponent as +// the actual digits. we then compare the big integer representations +// of both, and use that to direct rounding. +template +inline FASTFLOAT_CONSTEXPR20 adjusted_mantissa +digit_comp(parsed_number_string_t &num, adjusted_mantissa am) noexcept { + // remove the invalid exponent bias + am.power2 -= invalid_am_bias; + + int32_t sci_exp = scientific_exponent(num); + size_t max_digits = binary_format::max_digits(); + size_t digits = 0; + bigint bigmant; + parse_mantissa(bigmant, num, max_digits, digits); + // can't underflow, since digits is at most max_digits. + int32_t exponent = sci_exp + 1 - int32_t(digits); + if (exponent >= 0) { + return positive_digit_comp(bigmant, exponent); + } else { + return negative_digit_comp(bigmant, am, exponent); + } +} + +} // namespace fast_float + +#endif + +#ifndef FASTFLOAT_PARSE_NUMBER_H +#define FASTFLOAT_PARSE_NUMBER_H + + +#include +#include +#include +#include +namespace fast_float { + +namespace detail { +/** + * Special case +inf, -inf, nan, infinity, -infinity. + * The case comparisons could be made much faster given that we know that the + * strings a null-free and fixed. + **/ +template +from_chars_result_t FASTFLOAT_CONSTEXPR14 parse_infnan(UC const *first, + UC const *last, + T &value) noexcept { + from_chars_result_t answer{}; + answer.ptr = first; + answer.ec = std::errc(); // be optimistic + bool minusSign = false; + if (*first == + UC('-')) { // assume first < last, so dereference without checks; + // C++17 20.19.3.(7.1) explicitly forbids '+' here + minusSign = true; + ++first; + } +#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default + if (*first == UC('+')) { + ++first; + } +#endif + if (last - first >= 3) { + if (fastfloat_strncasecmp(first, str_const_nan(), 3)) { + answer.ptr = (first += 3); + value = minusSign ? -std::numeric_limits::quiet_NaN() + : std::numeric_limits::quiet_NaN(); + // Check for possible nan(n-char-seq-opt), C++17 20.19.3.7, + // C11 7.20.1.3.3. At least MSVC produces nan(ind) and nan(snan). + if (first != last && *first == UC('(')) { + for (UC const *ptr = first + 1; ptr != last; ++ptr) { + if (*ptr == UC(')')) { + answer.ptr = ptr + 1; // valid nan(n-char-seq-opt) + break; + } else if (!((UC('a') <= *ptr && *ptr <= UC('z')) || + (UC('A') <= *ptr && *ptr <= UC('Z')) || + (UC('0') <= *ptr && *ptr <= UC('9')) || *ptr == UC('_'))) + break; // forbidden char, not nan(n-char-seq-opt) + } + } + return answer; + } + if (fastfloat_strncasecmp(first, str_const_inf(), 3)) { + if ((last - first >= 8) && + fastfloat_strncasecmp(first + 3, str_const_inf() + 3, 5)) { + answer.ptr = first + 8; + } else { + answer.ptr = first + 3; + } + value = minusSign ? -std::numeric_limits::infinity() + : std::numeric_limits::infinity(); + return answer; + } + } + answer.ec = std::errc::invalid_argument; + return answer; +} + +/** + * Returns true if the floating-pointing rounding mode is to 'nearest'. + * It is the default on most system. This function is meant to be inexpensive. + * Credit : @mwalcott3 + */ +fastfloat_really_inline bool rounds_to_nearest() noexcept { + // https://lemire.me/blog/2020/06/26/gcc-not-nearest/ +#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) + return false; +#endif + // See + // A fast function to check your floating-point rounding mode + // https://lemire.me/blog/2022/11/16/a-fast-function-to-check-your-floating-point-rounding-mode/ + // + // This function is meant to be equivalent to : + // prior: #include + // return fegetround() == FE_TONEAREST; + // However, it is expected to be much faster than the fegetround() + // function call. + // + // The volatile keywoard prevents the compiler from computing the function + // at compile-time. + // There might be other ways to prevent compile-time optimizations (e.g., + // asm). The value does not need to be std::numeric_limits::min(), any + // small value so that 1 + x should round to 1 would do (after accounting for + // excess precision, as in 387 instructions). + static volatile float fmin = std::numeric_limits::min(); + float fmini = fmin; // we copy it so that it gets loaded at most once. +// +// Explanation: +// Only when fegetround() == FE_TONEAREST do we have that +// fmin + 1.0f == 1.0f - fmin. +// +// FE_UPWARD: +// fmin + 1.0f > 1 +// 1.0f - fmin == 1 +// +// FE_DOWNWARD or FE_TOWARDZERO: +// fmin + 1.0f == 1 +// 1.0f - fmin < 1 +// +// Note: This may fail to be accurate if fast-math has been +// enabled, as rounding conventions may not apply. +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(push) +// todo: is there a VS warning? +// see +// https://stackoverflow.com/questions/46079446/is-there-a-warning-for-floating-point-equality-checking-in-visual-studio-2013 +#elif defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wfloat-equal" +#elif defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wfloat-equal" +#endif + return (fmini + 1.0f == 1.0f - fmini); +#ifdef FASTFLOAT_VISUAL_STUDIO +#pragma warning(pop) +#elif defined(__clang__) +#pragma clang diagnostic pop +#elif defined(__GNUC__) +#pragma GCC diagnostic pop +#endif +} + +} // namespace detail + +template struct from_chars_caller { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + return from_chars_advanced(first, last, value, options); + } +}; + +#if __STDCPP_FLOAT32_T__ == 1 +template <> struct from_chars_caller { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, std::float32_t &value, + parse_options_t options) noexcept { + // if std::float32_t is defined, and we are in C++23 mode; macro set for + // float32; set value to float due to equivalence between float and + // float32_t + float val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + +#if __STDCPP_FLOAT64_T__ == 1 +template <> struct from_chars_caller { + template + FASTFLOAT_CONSTEXPR20 static from_chars_result_t + call(UC const *first, UC const *last, std::float64_t &value, + parse_options_t options) noexcept { + // if std::float64_t is defined, and we are in C++23 mode; macro set for + // float64; set value as double due to equivalence between double and + // float64_t + double val; + auto ret = from_chars_advanced(first, last, val, options); + value = val; + return ret; + } +}; +#endif + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, + chars_format fmt /*= chars_format::general*/) noexcept { + return from_chars_caller::call(first, last, value, + parse_options_t(fmt)); +} + +/** + * This function overload takes parsed_number_string_t structure that is created + * and populated either by from_chars_advanced function taking chars range and + * parsing options or other parsing custom function implemented by user. + */ +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_advanced(parsed_number_string_t &pns, T &value) noexcept { + + static_assert(is_supported_float_type(), + "only some floating-point types are supported"); + static_assert(is_supported_char_type(), + "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; + + answer.ec = std::errc(); // be optimistic + answer.ptr = pns.lastmatch; + // The implementation of the Clinger's fast path is convoluted because + // we want round-to-nearest in all cases, irrespective of the rounding mode + // selected on the thread. + // We proceed optimistically, assuming that detail::rounds_to_nearest() + // returns true. + if (binary_format::min_exponent_fast_path() <= pns.exponent && + pns.exponent <= binary_format::max_exponent_fast_path() && + !pns.too_many_digits) { + // Unfortunately, the conventional Clinger's fast path is only possible + // when the system rounds to the nearest float. + // + // We expect the next branch to almost always be selected. + // We could check it first (before the previous branch), but + // there might be performance advantages at having the check + // be last. + if (!cpp20_and_in_constexpr() && detail::rounds_to_nearest()) { + // We have that fegetround() == FE_TONEAREST. + // Next is Clinger's fast path. + if (pns.mantissa <= binary_format::max_mantissa_fast_path()) { + value = T(pns.mantissa); + if (pns.exponent < 0) { + value = value / binary_format::exact_power_of_ten(-pns.exponent); + } else { + value = value * binary_format::exact_power_of_ten(pns.exponent); + } + if (pns.negative) { + value = -value; + } + return answer; + } + } else { + // We do not have that fegetround() == FE_TONEAREST. + // Next is a modified Clinger's fast path, inspired by Jakub Jelínek's + // proposal + if (pns.exponent >= 0 && + pns.mantissa <= + binary_format::max_mantissa_fast_path(pns.exponent)) { +#if defined(__clang__) || defined(FASTFLOAT_32BIT) + // Clang may map 0 to -0.0 when fegetround() == FE_DOWNWARD + if (pns.mantissa == 0) { + value = pns.negative ? T(-0.) : T(0.); + return answer; + } +#endif + value = T(pns.mantissa) * + binary_format::exact_power_of_ten(pns.exponent); + if (pns.negative) { + value = -value; + } + return answer; + } + } + } + adjusted_mantissa am = + compute_float>(pns.exponent, pns.mantissa); + if (pns.too_many_digits && am.power2 >= 0) { + if (am != compute_float>(pns.exponent, pns.mantissa + 1)) { + am = compute_error>(pns.exponent, pns.mantissa); + } + } + // If we called compute_float>(pns.exponent, pns.mantissa) + // and we have an invalid power (am.power2 < 0), then we need to go the long + // way around again. This is very uncommon. + if (am.power2 < 0) { + am = digit_comp(pns, am); + } + to_float(pns.negative, am, value); + // Test for over/underflow. + if ((pns.mantissa != 0 && am.mantissa == 0 && am.power2 == 0) || + am.power2 == binary_format::infinite_power()) { + answer.ec = std::errc::result_out_of_range; + } + return answer; +} + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars_advanced(UC const *first, UC const *last, T &value, + parse_options_t options) noexcept { + + static_assert(is_supported_float_type(), + "only some floating-point types are supported"); + static_assert(is_supported_char_type(), + "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + parsed_number_string_t pns = + parse_number_string(first, last, options); + if (!pns.valid) { + if (options.format & chars_format::no_infnan) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } else { + return detail::parse_infnan(first, last, value); + } + } + + // call overload that takes parsed_number_string_t directly. + return from_chars_advanced(pns, value); +} + +template +FASTFLOAT_CONSTEXPR20 from_chars_result_t +from_chars(UC const *first, UC const *last, T &value, int base) noexcept { + static_assert(is_supported_char_type(), + "only char, wchar_t, char16_t and char32_t are supported"); + + from_chars_result_t answer; +#ifdef FASTFLOAT_SKIP_WHITE_SPACE // disabled by default + while ((first != last) && fast_float::is_space(uint8_t(*first))) { + first++; + } +#endif + if (first == last || base < 2 || base > 36) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } + return parse_int_string(first, last, value, base); +} + +} // namespace fast_float + +#endif diff --git a/deps/fast_float_c_interface/Makefile b/deps/fast_float_c_interface/Makefile new file mode 100644 index 0000000000..4db3efe2c3 --- /dev/null +++ b/deps/fast_float_c_interface/Makefile @@ -0,0 +1,37 @@ +CCCOLOR:="\033[34m" +SRCCOLOR:="\033[33m" +ENDCOLOR:="\033[0m" + +CXX?=c++ +# we need = instead of := so that $@ in QUIET_CXX gets evaluated in the rule and is assigned appropriate value. +TEMP:=$(CXX) +QUIET_CXX=@printf ' %b %b\n' $(CCCOLOR)C++$(ENDCOLOR) $(SRCCOLOR)$@$(ENDCOLOR) 1>&2; +CXX=$(QUIET_CXX)$(TEMP) + +WARN=-Wall -W -Wno-missing-field-initializers + +STD=-pedantic -std=c++11 + +OPT?=-O3 +CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1')) +ifeq ($(OPT),-O3) + ifeq (clang,$(CLANG)) + OPT+=-flto + else + OPT+=-flto=auto -ffat-lto-objects + endif +endif + +# 1) Today src/Makefile passes -m32 flag for explicit 32-bit build on 64-bit machine, via CFLAGS. For 32-bit build on +# 32-bit machine and 64-bit on 64-bit machine, CFLAGS are empty. No other flags are set that can conflict with C++, +# therefore let's use CFLAGS without changes for now. +# 2) FASTFLOAT_ALLOWS_LEADING_PLUS allows +inf to be parsed as inf, instead of error. +CXXFLAGS=$(STD) $(OPT) $(WARN) -static -fPIC -fno-exceptions $(CFLAGS) -D FASTFLOAT_ALLOWS_LEADING_PLUS + +.PHONY: all clean + +all: fast_float_strtod.o + +clean: + rm -f *.o || true; + diff --git a/deps/fast_float_c_interface/fast_float_strtod.cpp b/deps/fast_float_c_interface/fast_float_strtod.cpp new file mode 100644 index 0000000000..8e5d19470f --- /dev/null +++ b/deps/fast_float_c_interface/fast_float_strtod.cpp @@ -0,0 +1,24 @@ +/* + * Copyright Valkey Contributors. + * All rights reserved. + * SPDX-License-Identifier: BSD 3-Clause + */ + +#include "../fast_float/fast_float.h" +#include + +extern "C" +{ + double fast_float_strtod(const char *str, const char** endptr) + { + double temp = 0; + auto answer = fast_float::from_chars(str, str + strlen(str), temp); + if (answer.ec != std::errc()) { + errno = (answer.ec == std::errc::result_out_of_range) ? ERANGE : EINVAL; + } + if (endptr) { + *endptr = answer.ptr; + } + return temp; + } +} diff --git a/src/Makefile b/src/Makefile index f876f55dec..0cbf5763cb 100644 --- a/src/Makefile +++ b/src/Makefile @@ -424,6 +424,17 @@ ENGINE_TEST_OBJ:=$(sort $(patsubst unit/%.c,unit/%.o,$(ENGINE_TEST_FILES))) ENGINE_UNIT_TESTS:=$(ENGINE_NAME)-unit-tests$(PROG_SUFFIX) ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(ENGINE_SERVER_OBJ) $(ENGINE_CLI_OBJ) $(ENGINE_BENCHMARK_OBJ))) +USE_FAST_FLOAT?=no +ifeq ($(USE_FAST_FLOAT),yes) + # valkey_strtod.h uses this flag to switch valkey_strtod function to fast_float_strtod, + # therefore let's pass it to compiler for preprocessing. + FINAL_CFLAGS += -D USE_FAST_FLOAT + # next, let's build and add actual library containing fast_float_strtod function for linking. + DEPENDENCY_TARGETS += fast_float_c_interface + FAST_FLOAT_STRTOD_OBJECT := ../deps/fast_float_c_interface/fast_float_strtod.o + FINAL_LIBS += $(FAST_FLOAT_STRTOD_OBJECT) +endif + all: $(SERVER_NAME) $(ENGINE_SENTINEL_NAME) $(ENGINE_CLI_NAME) $(ENGINE_BENCHMARK_NAME) $(ENGINE_CHECK_RDB_NAME) $(ENGINE_CHECK_AOF_NAME) $(TLS_MODULE) $(RDMA_MODULE) @echo "" @echo "Hint: It's a good idea to run 'make test' ;)" @@ -588,7 +599,7 @@ bench: $(ENGINE_BENCHMARK_NAME) 32bit: @echo "" - @echo "WARNING: if it fails under Linux you probably need to install libc6-dev-i386" + @echo "WARNING: if it fails under Linux you probably need to install libc6-dev-i386 and libstdc++-11-dev-i386-cross" @echo "" $(MAKE) all-with-unit-tests CFLAGS="-m32" LDFLAGS="-m32" diff --git a/src/debug.c b/src/debug.c index 082e20a3b6..38b66dacb5 100644 --- a/src/debug.c +++ b/src/debug.c @@ -46,6 +46,8 @@ #include #include +#include "valkey_strtod.h" + #ifdef HAVE_BACKTRACE #include #ifndef __OpenBSD__ @@ -846,7 +848,7 @@ void debugCommand(client *c) { "string|integer|double|bignum|null|array|set|map|attrib|push|verbatim|true|false"); } } else if (!strcasecmp(c->argv[1]->ptr, "sleep") && c->argc == 3) { - double dtime = strtod(c->argv[2]->ptr, NULL); + double dtime = valkey_strtod(c->argv[2]->ptr, NULL); long long utime = dtime * 1000000; struct timespec tv; diff --git a/src/resp_parser.c b/src/resp_parser.c index 950d2227b7..101e883d2f 100644 --- a/src/resp_parser.c +++ b/src/resp_parser.c @@ -58,6 +58,8 @@ #include "resp_parser.h" #include "server.h" +#include "valkey_strtod.h" + static int parseBulk(ReplyParser *parser, void *p_ctx) { const char *proto = parser->curr_location; char *p = strchr(proto + 1, '\r'); @@ -150,13 +152,11 @@ static int parseDouble(ReplyParser *parser, void *p_ctx) { parser->curr_location = p + 2; /* for \r\n */ char buf[MAX_LONG_DOUBLE_CHARS + 1]; size_t len = p - proto - 1; - double d; + double d = 0; if (len <= MAX_LONG_DOUBLE_CHARS) { memcpy(buf, proto + 1, len); buf[len] = '\0'; - d = strtod(buf, NULL); /* We expect a valid representation. */ - } else { - d = 0; + d = valkey_strtod(buf, NULL); /* We expect a valid representation. */ } parser->callbacks.double_callback(p_ctx, d, proto, parser->curr_location - proto); return C_OK; diff --git a/src/sort.c b/src/sort.c index 92777b068c..ad0496da79 100644 --- a/src/sort.c +++ b/src/sort.c @@ -34,6 +34,8 @@ #include /* isnan() */ #include "cluster.h" +#include "valkey_strtod.h" + zskiplistNode *zslGetElementByRank(zskiplist *zsl, unsigned long rank); serverSortOperation *createSortOperation(int type, robj *pattern) { @@ -479,9 +481,9 @@ void sortCommandGeneric(client *c, int readonly) { } else { if (sdsEncodedObject(byval)) { char *eptr; - - vector[j].u.score = strtod(byval->ptr, &eptr); - if (eptr[0] != '\0' || errno == ERANGE || isnan(vector[j].u.score)) { + errno = 0; + vector[j].u.score = valkey_strtod(byval->ptr, &eptr); + if (eptr[0] != '\0' || errno == ERANGE || errno == EINVAL || isnan(vector[j].u.score)) { int_conversion_error = 1; } } else if (byval->encoding == OBJ_ENCODING_INT) { diff --git a/src/t_zset.c b/src/t_zset.c index 069ab0924a..a1e71208cb 100644 --- a/src/t_zset.c +++ b/src/t_zset.c @@ -60,6 +60,8 @@ #include "intset.h" /* Compact integer set structure */ #include +#include "valkey_strtod.h" + /*----------------------------------------------------------------------------- * Skiplist implementation of the low level API *----------------------------------------------------------------------------*/ @@ -546,11 +548,11 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) { spec->min = (long)min->ptr; } else { if (((char *)min->ptr)[0] == '(') { - spec->min = strtod((char *)min->ptr + 1, &eptr); + spec->min = valkey_strtod((char *)min->ptr + 1, &eptr); if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR; spec->minex = 1; } else { - spec->min = strtod((char *)min->ptr, &eptr); + spec->min = valkey_strtod((char *)min->ptr, &eptr); if (eptr[0] != '\0' || isnan(spec->min)) return C_ERR; } } @@ -558,11 +560,11 @@ static int zslParseRange(robj *min, robj *max, zrangespec *spec) { spec->max = (long)max->ptr; } else { if (((char *)max->ptr)[0] == '(') { - spec->max = strtod((char *)max->ptr + 1, &eptr); + spec->max = valkey_strtod((char *)max->ptr + 1, &eptr); if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR; spec->maxex = 1; } else { - spec->max = strtod((char *)max->ptr, &eptr); + spec->max = valkey_strtod((char *)max->ptr, &eptr); if (eptr[0] != '\0' || isnan(spec->max)) return C_ERR; } } @@ -757,7 +759,7 @@ double zzlStrtod(unsigned char *vstr, unsigned int vlen) { if (vlen > sizeof(buf) - 1) vlen = sizeof(buf) - 1; memcpy(buf, vstr, vlen); buf[vlen] = '\0'; - return strtod(buf, NULL); + return valkey_strtod(buf, NULL); } double zzlGetScore(unsigned char *sptr) { diff --git a/src/unit/test_files.h b/src/unit/test_files.h index 87bc031fb4..6ab7373007 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -166,6 +166,7 @@ int test_ld2string(int argc, char **argv, int flags); int test_fixedpoint_d2string(int argc, char **argv, int flags); int test_version2num(int argc, char **argv, int flags); int test_reclaimFilePageCache(int argc, char **argv, int flags); +int test_valkey_strtod(int argc, char **argv, int flags); int test_ziplistCreateIntList(int argc, char **argv, int flags); int test_ziplistPop(int argc, char **argv, int flags); int test_ziplistGetElementAtIndex3(int argc, char **argv, int flags); @@ -220,6 +221,7 @@ unitTest __test_rax_c[] = {{"test_raxRandomWalk", test_raxRandomWalk}, {"test_ra unitTest __test_sds_c[] = {{"test_sds", test_sds}, {"test_typesAndAllocSize", test_typesAndAllocSize}, {"test_sdsHeaderSizes", test_sdsHeaderSizes}, {"test_sdssplitargs", test_sdssplitargs}, {NULL, NULL}}; unitTest __test_sha1_c[] = {{"test_sha1", test_sha1}, {NULL, NULL}}; unitTest __test_util_c[] = {{"test_string2ll", test_string2ll}, {"test_string2l", test_string2l}, {"test_ll2string", test_ll2string}, {"test_ld2string", test_ld2string}, {"test_fixedpoint_d2string", test_fixedpoint_d2string}, {"test_version2num", test_version2num}, {"test_reclaimFilePageCache", test_reclaimFilePageCache}, {NULL, NULL}}; +unitTest __test_valkey_strtod_c[] = {{"test_valkey_strtod", test_valkey_strtod}, {NULL, NULL}}; unitTest __test_ziplist_c[] = {{"test_ziplistCreateIntList", test_ziplistCreateIntList}, {"test_ziplistPop", test_ziplistPop}, {"test_ziplistGetElementAtIndex3", test_ziplistGetElementAtIndex3}, {"test_ziplistGetElementOutOfRange", test_ziplistGetElementOutOfRange}, {"test_ziplistGetLastElement", test_ziplistGetLastElement}, {"test_ziplistGetFirstElement", test_ziplistGetFirstElement}, {"test_ziplistGetElementOutOfRangeReverse", test_ziplistGetElementOutOfRangeReverse}, {"test_ziplistIterateThroughFullList", test_ziplistIterateThroughFullList}, {"test_ziplistIterateThroughListFrom1ToEnd", test_ziplistIterateThroughListFrom1ToEnd}, {"test_ziplistIterateThroughListFrom2ToEnd", test_ziplistIterateThroughListFrom2ToEnd}, {"test_ziplistIterateThroughStartOutOfRange", test_ziplistIterateThroughStartOutOfRange}, {"test_ziplistIterateBackToFront", test_ziplistIterateBackToFront}, {"test_ziplistIterateBackToFrontDeletingAllItems", test_ziplistIterateBackToFrontDeletingAllItems}, {"test_ziplistDeleteInclusiveRange0To0", test_ziplistDeleteInclusiveRange0To0}, {"test_ziplistDeleteInclusiveRange0To1", test_ziplistDeleteInclusiveRange0To1}, {"test_ziplistDeleteInclusiveRange1To2", test_ziplistDeleteInclusiveRange1To2}, {"test_ziplistDeleteWithStartIndexOutOfRange", test_ziplistDeleteWithStartIndexOutOfRange}, {"test_ziplistDeleteWithNumOverflow", test_ziplistDeleteWithNumOverflow}, {"test_ziplistDeleteFooWhileIterating", test_ziplistDeleteFooWhileIterating}, {"test_ziplistReplaceWithSameSize", test_ziplistReplaceWithSameSize}, {"test_ziplistReplaceWithDifferentSize", test_ziplistReplaceWithDifferentSize}, {"test_ziplistRegressionTestForOver255ByteStrings", test_ziplistRegressionTestForOver255ByteStrings}, {"test_ziplistRegressionTestDeleteNextToLastEntries", test_ziplistRegressionTestDeleteNextToLastEntries}, {"test_ziplistCreateLongListAndCheckIndices", test_ziplistCreateLongListAndCheckIndices}, {"test_ziplistCompareStringWithZiplistEntries", test_ziplistCompareStringWithZiplistEntries}, {"test_ziplistMergeTest", test_ziplistMergeTest}, {"test_ziplistStressWithRandomPayloadsOfDifferentEncoding", test_ziplistStressWithRandomPayloadsOfDifferentEncoding}, {"test_ziplistCascadeUpdateEdgeCases", test_ziplistCascadeUpdateEdgeCases}, {"test_ziplistInsertEdgeCase", test_ziplistInsertEdgeCase}, {"test_ziplistStressWithVariableSize", test_ziplistStressWithVariableSize}, {"test_BenchmarkziplistFind", test_BenchmarkziplistFind}, {"test_BenchmarkziplistIndex", test_BenchmarkziplistIndex}, {"test_BenchmarkziplistValidateIntegrity", test_BenchmarkziplistValidateIntegrity}, {"test_BenchmarkziplistCompareWithString", test_BenchmarkziplistCompareWithString}, {"test_BenchmarkziplistCompareWithNumber", test_BenchmarkziplistCompareWithNumber}, {"test_ziplistStress__ziplistCascadeUpdate", test_ziplistStress__ziplistCascadeUpdate}, {NULL, NULL}}; unitTest __test_zipmap_c[] = {{"test_zipmapIterateWithLargeKey", test_zipmapIterateWithLargeKey}, {"test_zipmapIterateThroughElements", test_zipmapIterateThroughElements}, {NULL, NULL}}; unitTest __test_zmalloc_c[] = {{"test_zmallocInitialUsedMemory", test_zmallocInitialUsedMemory}, {"test_zmallocAllocReallocCallocAndFree", test_zmallocAllocReallocCallocAndFree}, {"test_zmallocAllocZeroByteAndFree", test_zmallocAllocZeroByteAndFree}, {NULL, NULL}}; @@ -240,6 +242,7 @@ struct unitTestSuite { {"test_sds.c", __test_sds_c}, {"test_sha1.c", __test_sha1_c}, {"test_util.c", __test_util_c}, + {"test_valkey_strtod.c", __test_valkey_strtod_c}, {"test_ziplist.c", __test_ziplist_c}, {"test_zipmap.c", __test_zipmap_c}, {"test_zmalloc.c", __test_zmalloc_c}, diff --git a/src/unit/test_valkey_strtod.c b/src/unit/test_valkey_strtod.c new file mode 100644 index 0000000000..4796d7a5b6 --- /dev/null +++ b/src/unit/test_valkey_strtod.c @@ -0,0 +1,36 @@ +/* + * Copyright Valkey Contributors. + * All rights reserved. + * SPDX-License-Identifier: BSD 3-Clause + */ + + +#include "../valkey_strtod.h" +#include "errno.h" +#include "math.h" +#include "test_help.h" + +int test_valkey_strtod(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + errno = 0; + double value = valkey_strtod("231.2341234", NULL); + TEST_ASSERT(value == 231.2341234); + TEST_ASSERT(errno == 0); + + value = valkey_strtod("+inf", NULL); + TEST_ASSERT(isinf(value)); + TEST_ASSERT(errno == 0); + + value = valkey_strtod("-inf", NULL); + TEST_ASSERT(isinf(value)); + TEST_ASSERT(errno == 0); + + value = valkey_strtod("inf", NULL); + TEST_ASSERT(isinf(value)); + TEST_ASSERT(errno == 0); + + return 0; +} diff --git a/src/util.c b/src/util.c index b1235c2822..0b7af2d3fa 100644 --- a/src/util.c +++ b/src/util.c @@ -51,6 +51,8 @@ #include "sha256.h" #include "config.h" +#include "valkey_strtod.h" + #define UNUSED(x) ((void)(x)) /* Glob-style pattern matching. */ @@ -595,10 +597,12 @@ int string2ld(const char *s, size_t slen, long double *dp) { int string2d(const char *s, size_t slen, double *dp) { errno = 0; char *eptr; - *dp = strtod(s, &eptr); + *dp = valkey_strtod(s, &eptr); if (slen == 0 || isspace(((const char *)s)[0]) || (size_t)(eptr - (char *)s) != slen || - (errno == ERANGE && (*dp == HUGE_VAL || *dp == -HUGE_VAL || fpclassify(*dp) == FP_ZERO)) || isnan(*dp)) + (errno == ERANGE && (*dp == HUGE_VAL || *dp == -HUGE_VAL || fpclassify(*dp) == FP_ZERO)) || isnan(*dp) || errno == EINVAL) { + errno = 0; return 0; + } return 1; } diff --git a/src/valkey-cli.c b/src/valkey-cli.c index dc31981483..4416e09431 100644 --- a/src/valkey-cli.c +++ b/src/valkey-cli.c @@ -65,6 +65,8 @@ #include "mt19937-64.h" #include "cli_commands.h" +#include "valkey_strtod.h" + #define UNUSED(V) ((void)V) #define OUTPUT_STANDARD 0 @@ -2537,9 +2539,10 @@ static int parseOptions(int argc, char **argv) { exit(1); } } else if (!strcmp(argv[i], "-t") && !lastarg) { + errno = 0; char *eptr; - double seconds = strtod(argv[++i], &eptr); - if (eptr[0] != '\0' || isnan(seconds) || seconds < 0.0) { + double seconds = valkey_strtod(argv[++i], &eptr); + if (eptr[0] != '\0' || isnan(seconds) || seconds < 0.0 || errno == EINVAL || errno == ERANGE) { fprintf(stderr, "Invalid connection timeout for -t.\n"); exit(1); } diff --git a/src/valkey_strtod.h b/src/valkey_strtod.h new file mode 100644 index 0000000000..037a3f3cec --- /dev/null +++ b/src/valkey_strtod.h @@ -0,0 +1,42 @@ +#ifndef FAST_FLOAT_STRTOD_H +#define FAST_FLOAT_STRTOD_H + +#ifdef USE_FAST_FLOAT + +#include "errno.h" + +/** + * Converts a null-terminated byte string to a double using the fast_float library. + * + * This function provides a C-compatible wrapper around the fast_float library's string-to-double + * conversion functionality. It aims to offer a faster alternative to the standard strtod function. + * + * str: A pointer to the null-terminated byte string to be converted. + * eptr: On success, stores char pointer pointing to '\0' at the end of the string. + * On failure, stores char pointer pointing to first invalid character in the string. + * returns: On success, the function returns the converted double value. + * On failure, it returns 0.0 and stores error code in errno to ERANGE or EINVAL. + * + * note: This function uses the fast_float library (https://github.com/fastfloat/fast_float) for + * the actual conversion, which can be significantly faster than standard library functions. + * Refer to "../deps/fast_float_c_interface" for more details. + * Refer to https://github.com/fastfloat/fast_float for more information on the underlying library. + */ +double fast_float_strtod(const char *str, char **endptr); + +static inline double valkey_strtod(const char *str, char **endptr) { + errno = 0; + return fast_float_strtod(str, endptr); +} + +#else + +#include + +static inline double valkey_strtod(const char *str, char **endptr) { + return strtod(str, endptr); +} + +#endif + +#endif // FAST_FLOAT_STRTOD_H diff --git a/tests/test_helper.tcl b/tests/test_helper.tcl index 7c15413806..1f0658071a 100644 --- a/tests/test_helper.tcl +++ b/tests/test_helper.tcl @@ -35,12 +35,12 @@ foreach test_dir $test_dirs { set cluster_test_dir unit/cluster foreach file [glob -nocomplain $dir/tests/$cluster_test_dir/*.tcl] { - lappend ::cluster_all_tests $cluster_test_dir/[file root [file tail $file]] + lappend ::cluster_all_tests $cluster_test_dir/[file root [file tail $file]] } set moduleapi_test_dir unit/moduleapi foreach file [glob -nocomplain $dir/tests/$moduleapi_test_dir/*.tcl] { - lappend ::module_api_all_tests $moduleapi_test_dir/[file root [file tail $file]] + lappend ::module_api_all_tests $moduleapi_test_dir/[file root [file tail $file]] } # Index to the next test to run in the ::all_tests list. @@ -654,7 +654,7 @@ for {set j 0} {$j < [llength $argv]} {incr j} { } } elseif {$opt eq {--quiet}} { set ::quiet 1 - } elseif {$opt eq {--io-threads}} { + } elseif {$opt eq {--io-threads}} { set ::io_threads 1 } elseif {$opt eq {--tls} || $opt eq {--tls-module}} { package require tls 1.6 From cf1a1e0931bd2db77c23b0058d8660461873dc8f Mon Sep 17 00:00:00 2001 From: Ray Cao Date: Mon, 25 Nov 2024 23:16:46 +0800 Subject: [PATCH 45/60] Optimize sdscatrepr by batch processing printable characters (#1342) Optimize sdscatrepr by reducing realloc calls, furthermore, we can reduce memcpy calls by batch processing of consecutive printable characters. Signed-off-by: Ray Cao Co-authored-by: Ray Cao --- src/sds.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/sds.c b/src/sds.c index 4dd7d709aa..ee7a2c0f97 100644 --- a/src/sds.c +++ b/src/sds.c @@ -954,23 +954,30 @@ void sdsfreesplitres(sds *tokens, int count) { sds sdscatrepr(sds s, const char *p, size_t len) { s = sdsMakeRoomFor(s, len + 2); s = sdscatlen(s, "\"", 1); - while (len--) { - switch (*p) { - case '\\': - case '"': s = sdscatprintf(s, "\\%c", *p); break; - case '\n': s = sdscatlen(s, "\\n", 2); break; - case '\r': s = sdscatlen(s, "\\r", 2); break; - case '\t': s = sdscatlen(s, "\\t", 2); break; - case '\a': s = sdscatlen(s, "\\a", 2); break; - case '\b': s = sdscatlen(s, "\\b", 2); break; - default: - if (isprint(*p)) - s = sdscatlen(s, p, 1); - else + while (len) { + if (isprint(*p)) { + const char *start = p; + while (len && isprint(*p)) { + len--; + p++; + } + s = sdscatlen(s, start, p - start); + } else { + switch (*p) { + case '\\': + case '"': s = sdscatprintf(s, "\\%c", *p); break; + case '\n': s = sdscatlen(s, "\\n", 2); break; + case '\r': s = sdscatlen(s, "\\r", 2); break; + case '\t': s = sdscatlen(s, "\\t", 2); break; + case '\a': s = sdscatlen(s, "\\a", 2); break; + case '\b': s = sdscatlen(s, "\\b", 2); break; + default: s = sdscatprintf(s, "\\x%02x", (unsigned char)*p); - break; + break; + } + p++; + len--; } - p++; } return sdscatlen(s, "\"", 1); } From 2d48a39c2781e72200b2e360ef250009c6701711 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 25 Nov 2024 23:56:51 +0800 Subject: [PATCH 46/60] Save open's errno when opening temp rdb fails to prevent it from being modified (#1347) Apparently on Mac, sleep will modify errno to ETIMEDOUT, and then it prints the misleading message: Operation timed out. Signed-off-by: Binbin --- src/replication.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/replication.c b/src/replication.c index dcf7ee3f8c..92dcc3a105 100644 --- a/src/replication.c +++ b/src/replication.c @@ -3688,7 +3688,11 @@ void syncWithPrimary(connection *conn) { snprintf(tmpfile, 256, "temp-%d.%ld.rdb", (int)server.unixtime, (long int)getpid()); dfd = open(tmpfile, O_CREAT | O_WRONLY | O_EXCL, 0644); if (dfd != -1) break; + /* We save the errno of open to prevent some systems from modifying it after + * the sleep call. For example, sleep in Mac will change errno to ETIMEDOUT. */ + int saved_errno = errno; sleep(1); + errno = saved_errno; } if (dfd == -1) { serverLog(LL_WARNING, "Opening the temp file needed for PRIMARY <-> REPLICA synchronization: %s", From 469d41fb37d7c88d508b0a8c7ac495a8f00c717f Mon Sep 17 00:00:00 2001 From: Binbin Date: Tue, 26 Nov 2024 00:00:47 +0800 Subject: [PATCH 47/60] Avoid double close on repl_transfer_fd (#1349) The code is ok before 2de544cfcc6d1aa7cf6d0c75a6116f7fc27b6fd6, but now we will set server.repl_transfer_fd right after dfd was initiated, and in here we have a double close error since dfd and server.repl_transfer_fd are the same fd. Also move the declaration of dfd/maxtries to a small scope to avoid the confusion since they are only used in this code. Signed-off-by: Binbin --- src/replication.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/replication.c b/src/replication.c index 92dcc3a105..97aa10dfab 100644 --- a/src/replication.c +++ b/src/replication.c @@ -3414,7 +3414,6 @@ void dualChannelSetupMainConnForPsync(connection *conn) { * establish a connection with the primary. */ void syncWithPrimary(connection *conn) { char tmpfile[256], *err = NULL; - int dfd = -1, maxtries = 5; int psync_result; /* If this event fired after the user turned the instance into a primary @@ -3684,6 +3683,7 @@ void syncWithPrimary(connection *conn) { /* Prepare a suitable temp file for bulk transfer */ if (!useDisklessLoad()) { + int dfd = -1, maxtries = 5; while (maxtries--) { snprintf(tmpfile, 256, "temp-%d.%ld.rdb", (int)server.unixtime, (long int)getpid()); dfd = open(tmpfile, O_CREAT | O_WRONLY | O_EXCL, 0644); @@ -3744,7 +3744,6 @@ void syncWithPrimary(connection *conn) { /* Fall through to regular error handling */ error: - if (dfd != -1) close(dfd); connClose(conn); server.repl_transfer_s = NULL; if (server.repl_rdb_transfer_s) { From 9305b49145172da781b8af2b5b96f9643e4367ec Mon Sep 17 00:00:00 2001 From: Amit Nagler <58042354+naglera@users.noreply.github.com> Date: Tue, 26 Nov 2024 16:51:52 +0200 Subject: [PATCH 48/60] Add tag for dual-channel logs (#999) This PR introduces a consistent tagging system for dual-channel logs. The goal is to improve log readability and filterability, making it easier for operators to manage and analyze log entries. Resolves https://github.com/valkey-io/valkey/issues/986 --------- Signed-off-by: naglera --- src/networking.c | 21 ++-- src/replication.c | 102 +++++++++--------- src/server.h | 5 + .../integration/dual-channel-replication.tcl | 2 +- 4 files changed, 70 insertions(+), 60 deletions(-) diff --git a/src/networking.c b/src/networking.c index 93aa9d00ae..9c51efc537 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1713,10 +1713,10 @@ void freeClient(client *c) { /* Log link disconnection with replica */ if (getClientType(c) == CLIENT_TYPE_REPLICA) { - serverLog(LL_NOTICE, - c->flag.repl_rdb_channel ? "Replica %s rdb channel disconnected." - : "Connection with replica %s lost.", - replicationGetReplicaName(c)); + if (c->flag.repl_rdb_channel) + dualChannelServerLog(LL_NOTICE, "Replica %s rdb channel disconnected.", replicationGetReplicaName(c)); + else + serverLog(LL_NOTICE, "Connection with replica %s lost.", replicationGetReplicaName(c)); } /* Free the query buffer */ @@ -1963,14 +1963,15 @@ int freeClientsInAsyncFreeQueue(void) { if (!c->rdb_client_disconnect_time) { if (c->conn) connSetReadHandler(c->conn, NULL); c->rdb_client_disconnect_time = server.unixtime; - serverLog(LL_VERBOSE, "Postpone RDB client id=%llu (%s) free for %d seconds", (unsigned long long)c->id, - replicationGetReplicaName(c), server.wait_before_rdb_client_free); + dualChannelServerLog(LL_VERBOSE, "Postpone RDB client id=%llu (%s) free for %d seconds", + (unsigned long long)c->id, replicationGetReplicaName(c), server.wait_before_rdb_client_free); } if (server.unixtime - c->rdb_client_disconnect_time <= server.wait_before_rdb_client_free) continue; - serverLog(LL_NOTICE, - "Replica main channel failed to establish PSYNC within the grace period (%ld seconds). " - "Freeing RDB client %llu.", - (long int)(server.unixtime - c->rdb_client_disconnect_time), (unsigned long long)c->id); + dualChannelServerLog( + LL_NOTICE, + "Replica main channel failed to establish PSYNC within the grace period (%ld seconds). " + "Freeing RDB client %llu.", + (long int)(server.unixtime - c->rdb_client_disconnect_time), (unsigned long long)c->id); c->flag.protected_rdb_channel = 0; } diff --git a/src/replication.c b/src/replication.c index 97aa10dfab..260da1cd6e 100644 --- a/src/replication.c +++ b/src/replication.c @@ -227,9 +227,9 @@ void addRdbReplicaToPsyncWait(client *replica_rdb_client) { tail->refcount++; } } - serverLog(LL_DEBUG, "Add rdb replica %s to waiting psync, with cid %llu, %s ", - replicationGetReplicaName(replica_rdb_client), (unsigned long long)replica_rdb_client->id, - tail ? "tracking repl-backlog tail" : "no repl-backlog to track"); + dualChannelServerLog(LL_DEBUG, "Add rdb replica %s to waiting psync, with cid %llu, %s ", + replicationGetReplicaName(replica_rdb_client), (unsigned long long)replica_rdb_client->id, + tail ? "tracking repl-backlog tail" : "no repl-backlog to track"); replica_rdb_client->ref_repl_buf_node = tail ? ln : NULL; /* Prevent rdb client from being freed before psync is established. */ replica_rdb_client->flag.protected_rdb_channel = 1; @@ -252,8 +252,8 @@ void backfillRdbReplicasToPsyncWait(void) { if (replica_rdb_client->ref_repl_buf_node) continue; replica_rdb_client->ref_repl_buf_node = ln; head->refcount++; - serverLog(LL_DEBUG, "Attach replica rdb client %llu to repl buf block", - (long long unsigned int)replica_rdb_client->id); + dualChannelServerLog(LL_DEBUG, "Attach replica rdb client %llu to repl buf block", + (long long unsigned int)replica_rdb_client->id); } raxStop(&iter); } @@ -271,10 +271,10 @@ void removeReplicaFromPsyncWait(client *replica_main_client) { } replica_rdb_client->ref_repl_buf_node = NULL; replica_rdb_client->flag.protected_rdb_channel = 0; - serverLog(LL_DEBUG, "Remove psync waiting replica %s with cid %llu, repl buffer block %s", - replicationGetReplicaName(replica_main_client), - (long long unsigned int)replica_main_client->associated_rdb_client_id, - o ? "ref count decreased" : "doesn't exist"); + dualChannelServerLog(LL_DEBUG, "Remove psync waiting replica %s with cid %llu, repl buffer block %s", + replicationGetReplicaName(replica_main_client), + (long long unsigned int)replica_main_client->associated_rdb_client_id, + o ? "ref count decreased" : "doesn't exist"); uint64_t id = htonu64(replica_rdb_client->id); raxRemove(server.replicas_waiting_psync, (unsigned char *)&id, sizeof(id), NULL); } @@ -391,8 +391,8 @@ void freeReplicaReferencedReplBuffer(client *replica) { if (replica->flag.repl_rdb_channel) { uint64_t rdb_cid = htonu64(replica->id); if (raxRemove(server.replicas_waiting_psync, (unsigned char *)&rdb_cid, sizeof(rdb_cid), NULL)) { - serverLog(LL_DEBUG, "Remove psync waiting replica %s with cid %llu from replicas rax.", - replicationGetReplicaName(replica), (long long unsigned int)replica->id); + dualChannelServerLog(LL_DEBUG, "Remove psync waiting replica %s with cid %llu from replicas rax.", + replicationGetReplicaName(replica), (long long unsigned int)replica->id); } } if (replica->ref_repl_buf_node != NULL) { @@ -1121,10 +1121,11 @@ void syncCommand(client *c) { * resync. */ if (primary_replid[0] != '?') server.stat_sync_partial_err++; if (c->replica_capa & REPLICA_CAPA_DUAL_CHANNEL) { - serverLog(LL_NOTICE, - "Replica %s is capable of dual channel synchronization, and partial sync isn't possible. " - "Full sync will continue with dedicated RDB channel.", - replicationGetReplicaName(c)); + dualChannelServerLog(LL_NOTICE, + "Replica %s is capable of dual channel synchronization, and partial sync " + "isn't possible. " + "Full sync will continue with dedicated RDB channel.", + replicationGetReplicaName(c)); const char *buf = "+DUALCHANNELSYNC\r\n"; if (connWrite(c->conn, buf, strlen(buf)) != (int)strlen(buf)) { freeClientAsync(c); @@ -2565,7 +2566,7 @@ void freePendingReplDataBuf(void) { * provisional primary struct, and free local replication buffer. */ void replicationAbortDualChannelSyncTransfer(void) { serverAssert(server.repl_rdb_channel_state != REPL_DUAL_CHANNEL_STATE_NONE); - serverLog(LL_NOTICE, "Aborting dual channel sync"); + dualChannelServerLog(LL_NOTICE, "Aborting dual channel sync"); if (server.repl_rdb_transfer_s) { connClose(server.repl_rdb_transfer_s); server.repl_rdb_transfer_s = NULL; @@ -2594,8 +2595,9 @@ int sendCurrentOffsetToReplica(client *replica) { int buflen; buflen = snprintf(buf, sizeof(buf), "$ENDOFF:%lld %s %d %llu\r\n", server.primary_repl_offset, server.replid, server.db->id, (long long unsigned int)replica->id); - serverLog(LL_NOTICE, "Sending to replica %s RDB end offset %lld and client-id %llu", - replicationGetReplicaName(replica), server.primary_repl_offset, (long long unsigned int)replica->id); + dualChannelServerLog(LL_NOTICE, "Sending to replica %s RDB end offset %lld and client-id %llu", + replicationGetReplicaName(replica), server.primary_repl_offset, + (long long unsigned int)replica->id); if (connSyncWrite(replica->conn, buf, buflen, server.repl_syncio_timeout * 1000) != buflen) { freeClientAsync(replica); return C_ERR; @@ -2604,7 +2606,7 @@ int sendCurrentOffsetToReplica(client *replica) { } static int dualChannelReplHandleHandshake(connection *conn, sds *err) { - serverLog(LL_DEBUG, "Received first reply from primary using rdb connection."); + dualChannelServerLog(LL_DEBUG, "Received first reply from primary using rdb connection."); /* AUTH with the primary if required. */ if (server.primary_auth) { char *args[] = {"AUTH", NULL, NULL}; @@ -2620,7 +2622,7 @@ static int dualChannelReplHandleHandshake(connection *conn, sds *err) { argc++; *err = sendCommandArgv(conn, argc, args, lens); if (*err) { - serverLog(LL_WARNING, "Sending command to primary in dual channel replication handshake: %s", *err); + dualChannelServerLog(LL_WARNING, "Sending command to primary in dual channel replication handshake: %s", *err); return C_ERR; } } @@ -2630,14 +2632,14 @@ static int dualChannelReplHandleHandshake(connection *conn, sds *err) { NULL); sdsfree(portstr); if (*err) { - serverLog(LL_WARNING, "Sending command to primary in dual channel replication handshake: %s", *err); + dualChannelServerLog(LL_WARNING, "Sending command to primary in dual channel replication handshake: %s", *err); return C_ERR; } if (connSetReadHandler(conn, dualChannelFullSyncWithPrimary) == C_ERR) { char conninfo[CONN_INFO_LEN]; - serverLog(LL_WARNING, "Can't create readable event for SYNC: %s (%s)", strerror(errno), - connGetInfo(conn, conninfo, sizeof(conninfo))); + dualChannelServerLog(LL_WARNING, "Can't create readable event for SYNC: %s (%s)", strerror(errno), + connGetInfo(conn, conninfo, sizeof(conninfo))); return C_ERR; } return C_OK; @@ -2646,11 +2648,11 @@ static int dualChannelReplHandleHandshake(connection *conn, sds *err) { static int dualChannelReplHandleAuthReply(connection *conn, sds *err) { *err = receiveSynchronousResponse(conn); if (*err == NULL) { - serverLog(LL_WARNING, "Primary did not respond to auth command during SYNC handshake"); + dualChannelServerLog(LL_WARNING, "Primary did not respond to auth command during SYNC handshake"); return C_ERR; } if ((*err)[0] == '-') { - serverLog(LL_WARNING, "Unable to AUTH to Primary: %s", *err); + dualChannelServerLog(LL_WARNING, "Unable to AUTH to Primary: %s", *err); return C_ERR; } server.repl_rdb_channel_state = REPL_DUAL_CHANNEL_RECEIVE_REPLCONF_REPLY; @@ -2660,17 +2662,17 @@ static int dualChannelReplHandleAuthReply(connection *conn, sds *err) { static int dualChannelReplHandleReplconfReply(connection *conn, sds *err) { *err = receiveSynchronousResponse(conn); if (*err == NULL) { - serverLog(LL_WARNING, "Primary did not respond to replconf command during SYNC handshake"); + dualChannelServerLog(LL_WARNING, "Primary did not respond to replconf command during SYNC handshake"); return C_ERR; } if (*err[0] == '-') { - serverLog(LL_NOTICE, "Server does not support sync with offset, dual channel sync approach cannot be used: %s", - *err); + dualChannelServerLog(LL_NOTICE, "Server does not support sync with offset, dual channel sync approach cannot be used: %s", + *err); return C_ERR; } if (connSyncWrite(conn, "SYNC\r\n", 6, server.repl_syncio_timeout * 1000) == -1) { - serverLog(LL_WARNING, "I/O error writing to Primary: %s", connGetLastError(conn)); + dualChannelServerLog(LL_WARNING, "I/O error writing to Primary: %s", connGetLastError(conn)); return C_ERR; } return C_OK; @@ -2684,7 +2686,7 @@ static int dualChannelReplHandleEndOffsetResponse(connection *conn, sds *err) { } if (*err[0] == '\0') { /* Retry again later */ - serverLog(LL_DEBUG, "Received empty $ENDOFF response"); + dualChannelServerLog(LL_DEBUG, "Received empty $ENDOFF response"); return C_RETRY; } long long reploffset; @@ -2693,7 +2695,7 @@ static int dualChannelReplHandleEndOffsetResponse(connection *conn, sds *err) { /* Parse end offset response */ char *endoff_format = "$ENDOFF:%lld %40s %d %llu"; if (sscanf(*err, endoff_format, &reploffset, primary_replid, &dbid, &rdb_client_id) != 4) { - serverLog(LL_WARNING, "Received unexpected $ENDOFF response: %s", *err); + dualChannelServerLog(LL_WARNING, "Received unexpected $ENDOFF response: %s", *err); return C_ERR; } server.rdb_client_id = rdb_client_id; @@ -2741,7 +2743,8 @@ static void dualChannelFullSyncWithPrimary(connection *conn) { /* Check for errors in the socket: after a non blocking connect() we * may find that the socket is in error state. */ if (connGetState(conn) != CONN_STATE_CONNECTED) { - serverLog(LL_WARNING, "Error condition on socket for dual channel replication: %s", connGetLastError(conn)); + dualChannelServerLog(LL_WARNING, "Error condition on socket for dual channel replication: %s", + connGetLastError(conn)); goto error; } switch (server.repl_rdb_channel_state) { @@ -2830,13 +2833,13 @@ int readIntoReplDataBlock(connection *conn, replDataBufBlock *data_block, size_t int nread = connRead(conn, data_block->buf + data_block->used, read); if (nread == -1) { if (connGetState(conn) != CONN_STATE_CONNECTED) { - serverLog(LL_NOTICE, "Error reading from primary: %s", connGetLastError(conn)); + dualChannelServerLog(LL_NOTICE, "Error reading from primary: %s", connGetLastError(conn)); cancelReplicationHandshake(1); } return C_ERR; } if (nread == 0) { - serverLog(LL_VERBOSE, "Provisional primary closed connection"); + dualChannelServerLog(LL_VERBOSE, "Provisional primary closed connection"); cancelReplicationHandshake(1); return C_ERR; } @@ -2865,7 +2868,7 @@ void bufferReplData(connection *conn) { if (readlen && remaining_bytes == 0) { if (server.client_obuf_limits[CLIENT_TYPE_REPLICA].hard_limit_bytes && server.pending_repl_data.len > server.client_obuf_limits[CLIENT_TYPE_REPLICA].hard_limit_bytes) { - serverLog(LL_NOTICE, "Replication buffer limit reached, stopping buffering."); + dualChannelServerLog(LL_NOTICE, "Replication buffer limit reached, stopping buffering."); /* Stop accumulating primary commands. */ connSetReadHandler(conn, NULL); break; @@ -2938,7 +2941,7 @@ void dualChannelSyncSuccess(void) { /* Wait for the accumulated buffer to be processed before reading any more replication updates */ if (server.pending_repl_data.blocks && streamReplDataBufToDb(server.primary) == C_ERR) { /* Sync session aborted during repl data streaming. */ - serverLog(LL_WARNING, "Failed to stream local replication buffer into memory"); + dualChannelServerLog(LL_WARNING, "Failed to stream local replication buffer into memory"); /* Verify sync is still in progress */ if (server.repl_rdb_channel_state != REPL_DUAL_CHANNEL_STATE_NONE) { replicationAbortDualChannelSyncTransfer(); @@ -2947,7 +2950,7 @@ void dualChannelSyncSuccess(void) { return; } freePendingReplDataBuf(); - serverLog(LL_NOTICE, "Successfully streamed replication data into memory"); + dualChannelServerLog(LL_NOTICE, "Successfully streamed replication data into memory"); /* We can resume reading from the primary connection once the local replication buffer has been loaded. */ replicationSteadyStateInit(); replicationSendAck(); /* Send ACK to notify primary that replica is synced */ @@ -2963,7 +2966,7 @@ int dualChannelSyncHandlePsync(void) { if (server.repl_rdb_channel_state < REPL_DUAL_CHANNEL_RDB_LOADED) { /* RDB is still loading */ if (connSetReadHandler(server.repl_provisional_primary.conn, bufferReplData) == C_ERR) { - serverLog(LL_WARNING, "Error while setting readable handler: %s", strerror(errno)); + dualChannelServerLog(LL_WARNING, "Error while setting readable handler: %s", strerror(errno)); cancelReplicationHandshake(1); return C_ERR; } @@ -2972,7 +2975,7 @@ int dualChannelSyncHandlePsync(void) { } serverAssert(server.repl_rdb_channel_state == REPL_DUAL_CHANNEL_RDB_LOADED); /* RDB is loaded */ - serverLog(LL_DEBUG, "Dual channel sync - psync established after rdb load"); + dualChannelServerLog(LL_DEBUG, "Psync established after rdb load"); dualChannelSyncSuccess(); return C_OK; } @@ -3066,8 +3069,9 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) { /* While in dual channel replication, we should use our prepared repl id and offset. */ psync_replid = server.repl_provisional_primary.replid; snprintf(psync_offset, sizeof(psync_offset), "%lld", server.repl_provisional_primary.reploff + 1); - serverLog(LL_NOTICE, "Trying a partial resynchronization using main channel (request %s:%s).", psync_replid, - psync_offset); + dualChannelServerLog(LL_NOTICE, + "Trying a partial resynchronization using main channel (request %s:%s).", + psync_replid, psync_offset); } else if (server.cached_primary) { psync_replid = server.cached_primary->replid; snprintf(psync_offset, sizeof(psync_offset), "%lld", server.cached_primary->reploff + 1); @@ -3214,7 +3218,7 @@ int replicaTryPartialResynchronization(connection *conn, int read_reply) { /* A response of +DUALCHANNELSYNC from the primary implies that partial * synchronization is not possible and that the primary supports full * sync using dedicated RDB channel. Full sync will continue that way. */ - serverLog(LL_NOTICE, "PSYNC is not possible, initialize RDB channel."); + dualChannelServerLog(LL_NOTICE, "PSYNC is not possible, initialize RDB channel."); sdsfree(reply); return PSYNC_FULLRESYNC_DUAL_CHANNEL; } @@ -3258,7 +3262,7 @@ int dualChannelReplMainConnRecvCapaReply(connection *conn, sds *err) { *err = receiveSynchronousResponse(conn); if (*err == NULL) return C_ERR; if ((*err)[0] == '-') { - serverLog(LL_NOTICE, "Primary does not understand REPLCONF identify: %s", *err); + dualChannelServerLog(LL_NOTICE, "Primary does not understand REPLCONF identify: %s", *err); return C_ERR; } return C_OK; @@ -3267,7 +3271,7 @@ int dualChannelReplMainConnRecvCapaReply(connection *conn, sds *err) { int dualChannelReplMainConnSendPsync(connection *conn, sds *err) { if (server.debug_pause_after_fork) debugPauseProcess(); if (replicaTryPartialResynchronization(conn, 0) == PSYNC_WRITE_ERROR) { - serverLog(LL_WARNING, "Aborting dual channel sync. Write error."); + dualChannelServerLog(LL_WARNING, "Aborting dual channel sync. Write error."); *err = sdsnew(connGetLastError(conn)); return C_ERR; } @@ -3279,8 +3283,8 @@ int dualChannelReplMainConnRecvPsyncReply(connection *conn, sds *err) { if (psync_result == PSYNC_WAIT_REPLY) return C_OK; /* Try again later... */ if (psync_result == PSYNC_CONTINUE) { - serverLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Primary accepted a Partial Resynchronization%s", - server.repl_rdb_transfer_s != NULL ? ", RDB load in background." : "."); + dualChannelServerLog(LL_NOTICE, "PRIMARY <-> REPLICA sync: Primary accepted a Partial Resynchronization%s", + server.repl_rdb_transfer_s != NULL ? ", RDB load in background." : "."); if (server.supervised_mode == SUPERVISED_SYSTEMD) { serverCommunicateSystemd("STATUS=PRIMARY <-> REPLICA sync: Partial Resynchronization accepted. Ready to " "accept connections in read-write mode.\n"); @@ -3328,7 +3332,7 @@ void dualChannelSetupMainConnForPsync(connection *conn) { } if (ret == C_ERR) { - serverLog(LL_WARNING, "Aborting dual channel sync. Main channel psync result %d %s", ret, err ? err : ""); + dualChannelServerLog(LL_WARNING, "Aborting dual channel sync. Main channel psync result %d %s", ret, err ? err : ""); cancelReplicationHandshake(1); } sdsfree(err); @@ -3717,8 +3721,8 @@ void syncWithPrimary(connection *conn) { } if (connSetReadHandler(conn, NULL) == C_ERR) { char conninfo[CONN_INFO_LEN]; - serverLog(LL_WARNING, "Can't clear main connection handler: %s (%s)", strerror(errno), - connGetInfo(conn, conninfo, sizeof(conninfo))); + dualChannelServerLog(LL_WARNING, "Can't clear main connection handler: %s (%s)", strerror(errno), + connGetInfo(conn, conninfo, sizeof(conninfo))); goto error; } server.repl_rdb_channel_state = REPL_DUAL_CHANNEL_SEND_HANDSHAKE; diff --git a/src/server.h b/src/server.h index 09b67b2670..0ec105a7ba 100644 --- a/src/server.h +++ b/src/server.h @@ -4044,6 +4044,11 @@ void debugPauseProcess(void); _serverLog(level, __VA_ARGS__); \ } while (0) +/* dualChannelServerLog - Log messages related to dual-channel operations + * This macro wraps the serverLog function, prepending "" + * to the log message. */ +#define dualChannelServerLog(level, ...) serverLog(level, " " __VA_ARGS__) + #define serverDebug(fmt, ...) printf("DEBUG %s:%d > " fmt "\n", __FILE__, __LINE__, __VA_ARGS__) #define serverDebugMark() printf("-- MARK %s:%d --\n", __FILE__, __LINE__) diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index 05bdc130c1..055ed670ab 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -485,7 +485,7 @@ start_server {tags {"dual-channel-replication external:skip"}} { } wait_for_value_to_propegate_to_replica $primary $replica "key1" # Confirm the occurrence of a race condition. - wait_for_log_messages -1 {"*Dual channel sync - psync established after rdb load*"} 0 2000 1 + wait_for_log_messages -1 {"* Psync established after rdb load*"} 0 2000 1 } } } From 66ae8b71352853ee90a0a9d4cddbbb406c189416 Mon Sep 17 00:00:00 2001 From: ranshid <88133677+ranshid@users.noreply.github.com> Date: Wed, 27 Nov 2024 07:34:02 +0200 Subject: [PATCH 49/60] change the container image to ubuntu:plucky (#1359) Our fortify workflow is running on ubuntu lunar container that is EOL since [January 25, 2024(January 25, 2024](https://lists.ubuntu.com/archives/ubuntu-announce/2024-January/000298.html). This case cause the workflow to fail during update actions like: ``` apt-get update && apt-get install -y make gcc-13 update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-1[3](https://github.com/valkey-io/valkey/actions/runs/12021130026/job/33547460209#step:5:3) 100 make all-with-unit-tests CC=gcc OPT=-O3 SERVER_CFLAGS='-Werror -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3' shell: sh -e {0} Ign:1 http://security.ubuntu.com/ubuntu lunar-security InRelease Err:2 http://security.ubuntu.com/ubuntu lunar-security Release [4](https://github.com/valkey-io/valkey/actions/runs/12021130026/job/33547460209#step:5:4)04 Not Found [IP: 91.189.91.82 80] Ign:3 http://archive.ubuntu.com/ubuntu lunar InRelease Ign:4 http://archive.ubuntu.com/ubuntu lunar-updates InRelease Ign:[5](https://github.com/valkey-io/valkey/actions/runs/12021130026/job/33547460209#step:5:5) http://archive.ubuntu.com/ubuntu lunar-backports InRelease Err:[6](https://github.com/valkey-io/valkey/actions/runs/12021130026/job/33547460209#step:5:7) http://archive.ubuntu.com/ubuntu lunar Release 404 Not Found [IP: 185.125.190.81 80] Err:7 http://archive.ubuntu.com/ubuntu lunar-updates Release 404 Not Found [IP: 185.125.190.81 80] Err:8 http://archive.ubuntu.com/ubuntu lunar-backports Release 404 Not Found [IP: 185.125.190.81 80] Reading package lists... E: The repository 'http://security.ubuntu.com/ubuntu lunar-security Release' does not have a Release file. E: The repository 'http://archive.ubuntu.com/ubuntu lunar Release' does not have a Release file. E: The repository 'http://archive.ubuntu.com/ubuntu lunar-updates Release' does not have a Release file. E: The repository 'http://archive.ubuntu.com/ubuntu lunar-backports Release' does not have a Release file. update-alternatives: error: alternative path /usr/bin/gcc-[13](https://github.com/valkey-io/valkey/actions/runs/12021130026/job/33547460209#step:5:14) doesn't exist Error: Process completed with exit code 2. ``` example: https://github.com/valkey-io/valkey/actions/runs/12021130026/job/33547460209 This pr uses the latest stable ubuntu image release [plucky](https://hub.docker.com/layers/library/ubuntu/plucky/images/sha256-dc4565c7636f006c26d54c988faae576465e825ea349fef6fd3af6bf5100e8b6?context=explore) Signed-off-by: Ran Shidlansik --- .github/workflows/daily.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/daily.yml b/.github/workflows/daily.yml index e39e672689..c06d73440d 100644 --- a/.github/workflows/daily.yml +++ b/.github/workflows/daily.yml @@ -86,7 +86,7 @@ jobs: (github.event_name == 'schedule' && github.repository == 'valkey-io/valkey') || (github.event_name == 'pull_request' && (contains(github.event.pull_request.labels.*.name, 'run-extra-tests') || github.event.pull_request.base.ref != 'unstable'))) && !contains(github.event.inputs.skipjobs, 'fortify') - container: ubuntu:lunar + container: ubuntu:plucky timeout-minutes: 14400 steps: - name: prep From 5d08149e726bb7d393d76401c7be683ceaf67b7b Mon Sep 17 00:00:00 2001 From: Binbin Date: Wed, 27 Nov 2024 18:02:07 +0800 Subject: [PATCH 50/60] Use fake client flag to replace not conn check (#1198) The fake client flag was introduced in #1063, we want this to replace all !conn fake client checks. Signed-off-by: Binbin --- src/module.c | 1 + src/networking.c | 12 ++++++++++-- src/server.c | 3 ++- src/server.h | 7 ++++--- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/module.c b/src/module.c index 1e98b36f30..794038beb4 100644 --- a/src/module.c +++ b/src/module.c @@ -681,6 +681,7 @@ void moduleReleaseTempClient(client *c) { c->bufpos = 0; c->raw_flag = 0; c->flag.module = 1; + c->flag.fake = 1; c->user = NULL; /* Root user */ c->cmd = c->lastcmd = c->realcmd = c->io_parsed_cmd = NULL; if (c->bstate.async_rm_call_handle) { diff --git a/src/networking.c b/src/networking.c index 9c51efc537..01aaa48148 100644 --- a/src/networking.c +++ b/src/networking.c @@ -314,7 +314,11 @@ int prepareClientToWrite(client *c) { * is set. */ if (c->flag.primary && !c->flag.primary_force_reply) return C_ERR; - if (!c->conn) return C_ERR; /* Fake client for AOF loading. */ + /* Skip the fake client, such as the fake client for AOF loading. + * But CLIENT_ID_CACHED_RESPONSE is allowed since it is a fake client + * but has a connection to cache the response. */ + if (c->flag.fake && c->id != CLIENT_ID_CACHED_RESPONSE) return C_ERR; + serverAssert(c->conn); /* Schedule the client to write the output buffers to the socket, unless * it should already be setup to do so (it has already pending data). */ @@ -348,6 +352,9 @@ sds aggregateClientOutputBuffer(client *c) { * It needs be paired with `deleteCachedResponseClient` function to stop caching. */ client *createCachedResponseClient(int resp) { struct client *recording_client = createClient(NULL); + /* It is a fake client but with a connection, setting a special client id, + * so we can identify it's a fake cached response client. */ + recording_client->id = CLIENT_ID_CACHED_RESPONSE; recording_client->resp = resp; /* Allocating the `conn` allows to prepare the caching client before adding * data to the clients output buffer by `prepareClientToWrite`. */ @@ -4499,7 +4506,8 @@ int checkClientOutputBufferLimits(client *c) { * * Returns 1 if client was (flagged) closed. */ int closeClientOnOutputBufferLimitReached(client *c, int async) { - if (!c->conn) return 0; /* It is unsafe to free fake clients. */ + if (c->flag.fake) return 0; /* It is unsafe to free fake clients. */ + serverAssert(c->conn); serverAssert(c->reply_bytes < SIZE_MAX - (1024 * 64)); /* Note that c->reply_bytes is irrelevant for replica clients * (they use the global repl buffers). */ diff --git a/src/server.c b/src/server.c index 6d346ac74c..a83ef9096c 100644 --- a/src/server.c +++ b/src/server.c @@ -970,9 +970,10 @@ void updateClientMemoryUsage(client *c) { } int clientEvictionAllowed(client *c) { - if (server.maxmemory_clients == 0 || c->flag.no_evict || !c->conn) { + if (server.maxmemory_clients == 0 || c->flag.no_evict || c->flag.fake) { return 0; } + serverAssert(c->conn); int type = getClientType(c); return (type == CLIENT_TYPE_NORMAL || type == CLIENT_TYPE_PUBSUB); } diff --git a/src/server.h b/src/server.h index 0ec105a7ba..70bd3868c3 100644 --- a/src/server.h +++ b/src/server.h @@ -1094,9 +1094,10 @@ typedef struct { /* With multiplexing we need to take per-client state. * Clients are taken in a linked list. */ -#define CLIENT_ID_AOF (UINT64_MAX) /* Reserved ID for the AOF client. If you \ - need more reserved IDs use UINT64_MAX-1, \ - -2, ... and so forth. */ +#define CLIENT_ID_AOF (UINT64_MAX) /* Reserved ID for the AOF client. If you \ + need more reserved IDs use UINT64_MAX-1, \ + -2, ... and so forth. */ +#define CLIENT_ID_CACHED_RESPONSE (UINT64_MAX - 1) /* Client for cached response, see createCachedResponseClient. */ /* Replication backlog is not a separate memory, it just is one consumer of * the global replication buffer. This structure records the reference of From db7b7396ff1cc98832396a57e8d3e76e0eebd5fa Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 28 Nov 2024 00:16:55 +0800 Subject: [PATCH 51/60] Make KEYS can visit expired key in import-source state (#1326) After #1185, a client in import-source state can visit expired key both in read commands and write commands, this commit handle keyIsExpired function to handle import-source state as well, so KEYS can visit the expired key. This is not particularly important, but it ensures the definition, also doing some cleanup around the test, verified that the client can indeed visit the expired key. Signed-off-by: Binbin --- src/db.c | 10 ++++++++-- src/networking.c | 2 +- tests/unit/expire.tcl | 23 ++++++++++++++--------- 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/src/db.c b/src/db.c index d3ef19027d..3f6452c44c 100644 --- a/src/db.c +++ b/src/db.c @@ -390,7 +390,7 @@ robj *dbRandomKey(serverDb *db) { if (allvolatile && (server.primary_host || server.import_mode) && --maxtries == 0) { /* If the DB is composed only of keys with an expire set, * it could happen that all the keys are already logically - * expired in the repilca, so the function cannot stop because + * expired in the replica, so the function cannot stop because * expireIfNeeded() is false, nor it can stop because * dictGetFairRandomKey() returns NULL (there are keys to return). * To prevent the infinite loop we do some tries, but if there @@ -1808,7 +1808,13 @@ int keyIsExpiredWithDictIndex(serverDb *db, robj *key, int dict_index) { /* Check if the key is expired. */ int keyIsExpired(serverDb *db, robj *key) { int dict_index = getKVStoreIndexForKey(key->ptr); - return keyIsExpiredWithDictIndex(db, key, dict_index); + if (!keyIsExpiredWithDictIndex(db, key, dict_index)) return 0; + + /* See expireIfNeededWithDictIndex for more details. */ + if (server.primary_host == NULL && server.import_mode) { + if (server.current_client && server.current_client->flag.import_source) return 0; + } + return 1; } keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, int flags, int dict_index) { diff --git a/src/networking.c b/src/networking.c index 01aaa48148..97479967f6 100644 --- a/src/networking.c +++ b/src/networking.c @@ -3617,7 +3617,7 @@ void clientCommand(client *c) { "NO-TOUCH (ON|OFF)", " Will not touch LRU/LFU stats when this mode is on.", "IMPORT-SOURCE (ON|OFF)", - " Mark this connection as an import source if server.import_mode is true.", + " Mark this connection as an import source if import-mode is enabled.", " Sync tools can set their connections into 'import-source' state to visit", " expired keys.", NULL}; diff --git a/tests/unit/expire.tcl b/tests/unit/expire.tcl index fba425f62d..941acfad38 100644 --- a/tests/unit/expire.tcl +++ b/tests/unit/expire.tcl @@ -841,7 +841,7 @@ start_server {tags {"expire"}} { r set foo1 bar PX 1 r set foo2 bar PX 1 - after 100 + after 10 assert_equal [r dbsize] {2} @@ -879,22 +879,27 @@ start_server {tags {"expire"}} { assert_equal [r debug set-active-expire 1] {OK} } {} {needs:debug} - test {RANDOMKEY can return expired key in import mode} { + test {Client can visit expired key in import-source state} { r flushall r config set import-mode yes - assert_equal [r client import-source on] {OK} - r set foo1 bar PX 1 + r set foo1 1 PX 1 after 10 - set client [valkey [srv "host"] [srv "port"] 0 $::tls] - if {!$::singledb} { - $client select 9 - } - assert_equal [$client ttl foo1] {-2} + # Normal clients cannot visit expired key. + assert_equal [r get foo1] {} + assert_equal [r ttl foo1] {-2} + assert_equal [r dbsize] 1 + # Client can visit expired key when in import-source state. + assert_equal [r client import-source on] {OK} + assert_equal [r ttl foo1] {0} + assert_equal [r get foo1] {1} + assert_equal [r incr foo1] {2} assert_equal [r randomkey] {foo1} + assert_equal [r scan 0 match * count 10000] {0 foo1} + assert_equal [r keys *] {foo1} assert_equal [r client import-source off] {OK} r config set import-mode no From a939cb88ee0c0512c003106be483b7c6968b3e7f Mon Sep 17 00:00:00 2001 From: Binbin Date: Thu, 28 Nov 2024 14:10:48 +0800 Subject: [PATCH 52/60] Handle keyIsExpiredWithDictIndex to make it check for import mode (#1368) In #1326 we make KEYS can visit expired key in import-source state by updating keyIsExpired to check for import mode. But after #1205, we now use keyIsExpiredWithDictIndex to optimize and remove the redundant dict_index, and keyIsExpiredWithDictIndex does not handle this logic. In this commit, we handle keyIsExpiredWithDictIndex to make it check for import mode as well so that KEYS can visit the expired key. Signed-off-by: Binbin --- src/db.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/db.c b/src/db.c index 3f6452c44c..3c3ccb4899 100644 --- a/src/db.c +++ b/src/db.c @@ -1789,7 +1789,7 @@ void propagateDeletion(serverDb *db, robj *key, int lazy) { decrRefCount(argv[1]); } -int keyIsExpiredWithDictIndex(serverDb *db, robj *key, int dict_index) { +static int keyIsExpiredWithDictIndexImpl(serverDb *db, robj *key, int dict_index) { /* Don't expire anything while loading. It will be done later. */ if (server.loading) return 0; @@ -1806,9 +1806,8 @@ int keyIsExpiredWithDictIndex(serverDb *db, robj *key, int dict_index) { } /* Check if the key is expired. */ -int keyIsExpired(serverDb *db, robj *key) { - int dict_index = getKVStoreIndexForKey(key->ptr); - if (!keyIsExpiredWithDictIndex(db, key, dict_index)) return 0; +int keyIsExpiredWithDictIndex(serverDb *db, robj *key, int dict_index) { + if (!keyIsExpiredWithDictIndexImpl(db, key, dict_index)) return 0; /* See expireIfNeededWithDictIndex for more details. */ if (server.primary_host == NULL && server.import_mode) { @@ -1817,9 +1816,15 @@ int keyIsExpired(serverDb *db, robj *key) { return 1; } +/* Check if the key is expired. */ +int keyIsExpired(serverDb *db, robj *key) { + int dict_index = getKVStoreIndexForKey(key->ptr); + return keyIsExpiredWithDictIndex(db, key, dict_index); +} + keyStatus expireIfNeededWithDictIndex(serverDb *db, robj *key, int flags, int dict_index) { if (server.lazy_expire_disabled) return KEY_VALID; - if (!keyIsExpiredWithDictIndex(db, key, dict_index)) return KEY_VALID; + if (!keyIsExpiredWithDictIndexImpl(db, key, dict_index)) return KEY_VALID; /* If we are running in the context of a replica, instead of * evicting the expired key from the database, we return ASAP: From fd58f8d0585a3e558fbb837c2302ef51dc8d1810 Mon Sep 17 00:00:00 2001 From: zvi-code <54795925+zvi-code@users.noreply.github.com> Date: Thu, 28 Nov 2024 17:27:00 +0200 Subject: [PATCH 53/60] Disable lazy free in defrag test to fix 32bit daily failure (#1370) Signed-off-by: Zvi Schneider Co-authored-by: Zvi Schneider --- tests/unit/memefficiency.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/memefficiency.tcl b/tests/unit/memefficiency.tcl index d5a6a6efe2..67329f03f1 100644 --- a/tests/unit/memefficiency.tcl +++ b/tests/unit/memefficiency.tcl @@ -720,11 +720,11 @@ run_solo {defrag} { } } - start_cluster 1 0 {tags {"defrag external:skip cluster"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save ""}} { + start_cluster 1 0 {tags {"defrag external:skip cluster"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" lazyfree-lazy-user-del no}} { test_active_defrag "cluster" } - start_server {tags {"defrag external:skip standalone"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save ""}} { + start_server {tags {"defrag external:skip standalone"} overrides {appendonly yes auto-aof-rewrite-percentage 0 save "" lazyfree-lazy-user-del no}} { test_active_defrag "standalone" } } ;# run_solo From 4695d118dd6126b9b4f3e3415198df398e8bbb79 Mon Sep 17 00:00:00 2001 From: zhenwei pi Date: Fri, 29 Nov 2024 18:13:34 +0800 Subject: [PATCH 54/60] RDMA builtin support (#1209) There are several patches in this PR: * Abstract set/rewrite config bind option: `bind` option is a special config, `socket` and `tls` are using the same one. However RDMA uses the similar style but different one. Use a bit abstract work to make it flexible for both `socket` and `RDMA`. (Even for QUIC in the future.) * Introduce closeListener for connection type: closing socket by a simple syscall would be fine, RDMA has complex logic. Introduce connection type specific close listener method. * RDMA: Use valkey.conf style instead of module parameters: use `--rdma-bind` and `--rdma-port` style instead of module parameters. The module style config `rdma.bind` and `rdma.port` are removed. * RDMA: Support builtin: support `make BUILD_RDMA=yes`. module style is still kept for now. Signed-off-by: zhenwei pi --- README.md | 26 +++- cmake/Modules/SourceFiles.cmake | 1 + cmake/Modules/ValkeySetup.cmake | 29 +++-- src/CMakeLists.txt | 2 +- src/Makefile | 30 ++--- src/config.c | 114 +++++++++++++--- src/connection.c | 3 + src/connection.h | 10 ++ src/rdma.c | 222 ++++++++------------------------ src/server.c | 28 ++-- src/server.h | 13 +- src/socket.c | 14 ++ src/tls.c | 5 + src/unix.c | 5 + tests/rdma/run.py | 2 +- tests/unit/introspection.tcl | 4 + valkey.conf | 48 +++++++ 17 files changed, 314 insertions(+), 242 deletions(-) diff --git a/README.md b/README.md index a32ac255df..c447cc8d47 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,13 @@ To build TLS as Valkey module: Note that sentinel mode does not support TLS module. To build with experimental RDMA support you'll need RDMA development libraries -(e.g. librdmacm-dev and libibverbs-dev on Debian/Ubuntu). For now, Valkey only -supports RDMA as connection module mode. Run: +(e.g. librdmacm-dev and libibverbs-dev on Debian/Ubuntu). + +To build RDMA support as Valkey built-in: + + % make BUILD_RDMA=yes + +To build RDMA as Valkey module: % make BUILD_RDMA=module @@ -203,20 +208,27 @@ Note that Valkey Over RDMA is an experimental feature. It may be changed or removed in any minor or major version. Currently, it is only supported on Linux. -To manually run a Valkey server with RDMA mode: +* RDMA built-in mode: + ``` + ./src/valkey-server --protected-mode no \ + --rdma-bind 192.168.122.100 --rdma-port 6379 + ``` - % ./src/valkey-server --protected-mode no \ - --loadmodule src/valkey-rdma.so bind=192.168.122.100 port=6379 +* RDMA module mode: + ``` + ./src/valkey-server --protected-mode no \ + --loadmodule src/valkey-rdma.so --rdma-bind 192.168.122.100 --rdma-port 6379 + ``` It's possible to change bind address/port of RDMA by runtime command: - 192.168.122.100:6379> CONFIG SET rdma.port 6380 + 192.168.122.100:6379> CONFIG SET rdma-port 6380 It's also possible to have both RDMA and TCP available, and there is no conflict of TCP(6379) and RDMA(6379), Ex: % ./src/valkey-server --protected-mode no \ - --loadmodule src/valkey-rdma.so bind=192.168.122.100 port=6379 \ + --loadmodule src/valkey-rdma.so --rdma-bind 192.168.122.100 --rdma-port 6379 \ --port 6379 Note that the network card (192.168.122.100 of this example) should support diff --git a/cmake/Modules/SourceFiles.cmake b/cmake/Modules/SourceFiles.cmake index 873229d6f0..c34ae644a2 100644 --- a/cmake/Modules/SourceFiles.cmake +++ b/cmake/Modules/SourceFiles.cmake @@ -88,6 +88,7 @@ set(VALKEY_SERVER_SRCS ${CMAKE_SOURCE_DIR}/src/tracking.c ${CMAKE_SOURCE_DIR}/src/socket.c ${CMAKE_SOURCE_DIR}/src/tls.c + ${CMAKE_SOURCE_DIR}/src/rdma.c ${CMAKE_SOURCE_DIR}/src/sha256.c ${CMAKE_SOURCE_DIR}/src/timeout.c ${CMAKE_SOURCE_DIR}/src/setcpuaffinity.c diff --git a/cmake/Modules/ValkeySetup.cmake b/cmake/Modules/ValkeySetup.cmake index 4fafd07910..8a4d4da1c9 100644 --- a/cmake/Modules/ValkeySetup.cmake +++ b/cmake/Modules/ValkeySetup.cmake @@ -208,25 +208,30 @@ if (BUILD_RDMA) # RDMA support (Linux only) if (LINUX AND NOT APPLE) valkey_parse_build_option(${BUILD_RDMA} USE_RDMA) + find_package(PkgConfig REQUIRED) + # Locate librdmacm & libibverbs, fail if we can't find them + valkey_pkg_config(librdmacm RDMACM_LIBS) + valkey_pkg_config(libibverbs IBVERBS_LIBS) + message(STATUS "${RDMACM_LIBS};${IBVERBS_LIBS}") + list(APPEND RDMA_LIBS "${RDMACM_LIBS};${IBVERBS_LIBS}") + if (USE_RDMA EQUAL 2) # Module message(STATUS "Building RDMA as module") add_valkey_server_compiler_options("-DUSE_RDMA=2") - - # Locate librdmacm & libibverbs, fail if we can't find them - valkey_pkg_config(librdmacm RDMACM_LIBS) - valkey_pkg_config(libibverbs IBVERBS_LIBS) - - list(APPEND RDMA_LIBS "${RDMACM_LIBS};${IBVERBS_LIBS}") - set(BUILD_RDMA_MODULE 1) - elseif (USE_RDMA EQUAL 1) - # RDMA can only be built as a module. So disable it - message(WARNING "BUILD_RDMA can be one of: [NO | 0 | MODULE], but '${BUILD_RDMA}' was provided") - message(STATUS "RDMA build is disabled") - set(USE_RDMA 0) + set(BUILD_RDMA_MODULE 2) + elseif (USE_RDMA EQUAL 1) # Builtin + message(STATUS "Building RDMA as builtin") + add_valkey_server_compiler_options("-DUSE_RDMA=1") + add_valkey_server_compiler_options("-DBUILD_RDMA_MODULE=0") + list(APPEND SERVER_LIBS "${RDMA_LIBS}") endif () else () message(WARNING "RDMA is only supported on Linux platforms") endif () +else () + # By default, RDMA is disabled + message(STATUS "RDMA is disabled") + set(USE_RDMA 0) endif () set(BUILDING_ARM64 0) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 51e1b5a2e6..b87dff3db0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -55,7 +55,7 @@ if (BUILD_RDMA_MODULE) set(MODULE_NAME "valkey-rdma") message(STATUS "Building RDMA module") add_library(${MODULE_NAME} SHARED "${VALKEY_RDMA_MODULE_SRCS}") - target_compile_options(${MODULE_NAME} PRIVATE -DBUILD_RDMA_MODULE -DUSE_RDMA=1) + target_compile_options(${MODULE_NAME} PRIVATE -DBUILD_RDMA_MODULE=2 -DUSE_RDMA=1) target_link_libraries(${MODULE_NAME} "${RDMA_LIBS}") # remove the "lib" prefix from the module set_target_properties(${MODULE_NAME} PROPERTIES PREFIX "") diff --git a/src/Makefile b/src/Makefile index 0cbf5763cb..3b4ad0a2ef 100644 --- a/src/Makefile +++ b/src/Makefile @@ -325,26 +325,26 @@ ifeq ($(BUILD_TLS),module) TLS_MODULE_CFLAGS+=-DUSE_OPENSSL=$(BUILD_MODULE) $(OPENSSL_CFLAGS) -DBUILD_TLS_MODULE=$(BUILD_MODULE) endif -BUILD_RDMA:=no -RDMA_MODULE= -RDMA_MODULE_NAME:=valkey-rdma$(PROG_SUFFIX).so -RDMA_MODULE_CFLAGS:=$(FINAL_CFLAGS) -ifeq ($(BUILD_RDMA),module) - FINAL_CFLAGS+=-DUSE_RDMA=$(BUILD_MODULE) - RDMA_PKGCONFIG := $(shell $(PKG_CONFIG) --exists librdmacm libibverbs && echo $$?) +RDMA_LIBS= +RDMA_PKGCONFIG := $(shell $(PKG_CONFIG) --exists librdmacm libibverbs && echo $$?) ifeq ($(RDMA_PKGCONFIG),0) RDMA_LIBS=$(shell $(PKG_CONFIG) --libs librdmacm libibverbs) else RDMA_LIBS=-lrdmacm -libverbs endif - RDMA_MODULE=$(RDMA_MODULE_NAME) - RDMA_MODULE_CFLAGS+=-DUSE_RDMA=$(BUILD_YES) -DBUILD_RDMA_MODULE $(RDMA_LIBS) -else -ifeq ($(BUILD_RDMA),no) - # disable RDMA, do nothing -else - $(error "RDMA is only supported as module (BUILD_RDMA=module), or disabled (BUILD_RDMA=no)") + +ifeq ($(BUILD_RDMA),yes) + FINAL_CFLAGS+=-DUSE_RDMA=$(BUILD_YES) -DBUILD_RDMA_MODULE=$(BUILD_NO) + FINAL_LIBS += $(RDMA_LIBS) endif + +RDMA_MODULE= +RDMA_MODULE_NAME:=valkey-rdma$(PROG_SUFFIX).so +RDMA_MODULE_CFLAGS:=$(FINAL_CFLAGS) +ifeq ($(BUILD_RDMA),module) + FINAL_CFLAGS+=-DUSE_RDMA=$(BUILD_MODULE) + RDMA_MODULE=$(RDMA_MODULE_NAME) + RDMA_MODULE_CFLAGS+=-DUSE_RDMA=$(BUILD_MODULE) -DBUILD_RDMA_MODULE=$(BUILD_MODULE) $(RDMA_LIBS) endif ifndef V @@ -411,7 +411,7 @@ endif ENGINE_NAME=valkey SERVER_NAME=$(ENGINE_NAME)-server$(PROG_SUFFIX) ENGINE_SENTINEL_NAME=$(ENGINE_NAME)-sentinel$(PROG_SUFFIX) -ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o +ENGINE_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o memory_prefetch.o io_threads.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o cluster_slot_stats.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o valkey-check-rdb.o valkey-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o allocator_defrag.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o rdma.o ENGINE_CLI_NAME=$(ENGINE_NAME)-cli$(PROG_SUFFIX) ENGINE_CLI_OBJ=anet.o adlist.o dict.o valkey-cli.o zmalloc.o release.o ae.o serverassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o ENGINE_BENCHMARK_NAME=$(ENGINE_NAME)-benchmark$(PROG_SUFFIX) diff --git a/src/config.c b/src/config.c index c4009adefa..7f0901c50a 100644 --- a/src/config.c +++ b/src/config.c @@ -1536,10 +1536,27 @@ void rewriteConfigOOMScoreAdjValuesOption(standardConfig *config, const char *na } /* Rewrite the bind option. */ -void rewriteConfigBindOption(standardConfig *config, const char *name, struct rewriteConfigState *state) { +static void rewriteConfigBindOption(standardConfig *config, const char *name, struct rewriteConfigState *state, char **bindaddr, int bindaddr_count) { UNUSED(config); int force = 1; sds line, addresses; + + /* Rewrite as bind ... */ + if (bindaddr_count > 0) + addresses = sdsjoin(bindaddr, bindaddr_count, " "); + else + addresses = sdsnew("\"\""); + line = sdsnew(name); + line = sdscatlen(line, " ", 1); + line = sdscatsds(line, addresses); + sdsfree(addresses); + + rewriteConfigRewriteLine(state, name, line, force); +} + +/* Rewrite the bind option. */ +static void rewriteConfigSocketBindOption(standardConfig *config, const char *name, struct rewriteConfigState *state) { + UNUSED(config); int is_default = 0; /* Compare server.bindaddr with CONFIG_DEFAULT_BINDADDR */ @@ -1559,17 +1576,7 @@ void rewriteConfigBindOption(standardConfig *config, const char *name, struct re return; } - /* Rewrite as bind ... */ - if (server.bindaddr_count > 0) - addresses = sdsjoin(server.bindaddr, server.bindaddr_count, " "); - else - addresses = sdsnew("\"\""); - line = sdsnew(name); - line = sdscatlen(line, " ", 1); - line = sdscatsds(line, addresses); - sdsfree(addresses); - - rewriteConfigRewriteLine(state, name, line, force); + rewriteConfigBindOption(config, name, state, server.bindaddr, server.bindaddr_count); } /* Rewrite the loadmodule option. */ @@ -2637,7 +2644,7 @@ static int applyBind(const char **err) { tcp_listener->ct = connectionByType(CONN_TYPE_SOCKET); if (changeListener(tcp_listener) == C_ERR) { *err = "Failed to bind to specified addresses."; - if (tls_listener) closeListener(tls_listener); /* failed with TLS together */ + if (tls_listener) connCloseListener(tls_listener); /* failed with TLS together */ return 0; } @@ -2649,7 +2656,7 @@ static int applyBind(const char **err) { tls_listener->ct = connectionByType(CONN_TYPE_TLS); if (changeListener(tls_listener) == C_ERR) { *err = "Failed to bind to specified addresses."; - closeListener(tcp_listener); /* failed with TCP together */ + connCloseListener(tcp_listener); /* failed with TCP together */ return 0; } } @@ -2922,8 +2929,9 @@ static sds getConfigNotifyKeyspaceEventsOption(standardConfig *config) { return keyspaceEventsFlagsToString(server.notify_keyspace_events); } -static int setConfigBindOption(standardConfig *config, sds *argv, int argc, const char **err) { +static int setConfigBindOption(standardConfig *config, sds *argv, int argc, const char **err, char **bindaddr, int *bindaddr_count) { UNUSED(config); + int orig_bindaddr_count = *bindaddr_count; int j; if (argc > CONFIG_BINDADDR_MAX) { @@ -2935,11 +2943,73 @@ static int setConfigBindOption(standardConfig *config, sds *argv, int argc, cons if (argc == 1 && sdslen(argv[0]) == 0) argc = 0; /* Free old bind addresses */ - for (j = 0; j < server.bindaddr_count; j++) { - zfree(server.bindaddr[j]); + for (j = 0; j < orig_bindaddr_count; j++) zfree(bindaddr[j]); + for (j = 0; j < argc; j++) bindaddr[j] = zstrdup(argv[j]); + *bindaddr_count = argc; + + return 1; +} + +static int setConfigSocketBindOption(standardConfig *config, sds *argv, int argc, const char **err) { + UNUSED(config); + return setConfigBindOption(config, argv, argc, err, server.bindaddr, &server.bindaddr_count); +} + +static int setConfigRdmaBindOption(standardConfig *config, sds *argv, int argc, const char **err) { + UNUSED(config); + return setConfigBindOption(config, argv, argc, err, server.rdma_ctx_config.bindaddr, &server.rdma_ctx_config.bindaddr_count); +} + +static sds getConfigRdmaBindOption(standardConfig *config) { + UNUSED(config); + return sdsjoin(server.rdma_ctx_config.bindaddr, server.rdma_ctx_config.bindaddr_count, " "); +} + +static void rewriteConfigRdmaBindOption(standardConfig *config, const char *name, struct rewriteConfigState *state) { + UNUSED(config); + + if (server.rdma_ctx_config.bindaddr_count) { + rewriteConfigBindOption(config, name, state, server.rdma_ctx_config.bindaddr, + server.rdma_ctx_config.bindaddr_count); + } +} + +static int applyRdmaBind(const char **err) { + connListener *rdma_listener = listenerByType(CONN_TYPE_RDMA); + + if (!rdma_listener) { + *err = "No RDMA building support."; + return 0; + } + + rdma_listener->bindaddr = server.rdma_ctx_config.bindaddr; + rdma_listener->bindaddr_count = server.rdma_ctx_config.bindaddr_count; + rdma_listener->port = server.rdma_ctx_config.port; + rdma_listener->ct = connectionByType(CONN_TYPE_RDMA); + if (changeListener(rdma_listener) == C_ERR) { + *err = "Failed to bind to specified addresses for RDMA."; + return 0; + } + + return 1; +} + +static int updateRdmaPort(const char **err) { + connListener *listener = listenerByType(CONN_TYPE_RDMA); + + if (listener == NULL) { + *err = "No RDMA building support."; + return 0; + } + + listener->bindaddr = server.rdma_ctx_config.bindaddr; + listener->bindaddr_count = server.rdma_ctx_config.bindaddr_count; + listener->port = server.rdma_ctx_config.port; + listener->ct = connectionByType(CONN_TYPE_RDMA); + if (changeListener(listener) == C_ERR) { + *err = "Unable to listen on this port for RDMA. Check server logs."; + return 0; } - for (j = 0; j < argc; j++) server.bindaddr[j] = zstrdup(argv[j]); - server.bindaddr_count = argc; return 1; } @@ -3237,6 +3307,9 @@ standardConfig static_configs[] = { createIntConfig("watchdog-period", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, INT_MAX, server.watchdog_period, 0, INTEGER_CONFIG, NULL, updateWatchdogPeriod), createIntConfig("shutdown-timeout", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.shutdown_timeout, 10, INTEGER_CONFIG, NULL, NULL), createIntConfig("repl-diskless-sync-max-replicas", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.repl_diskless_sync_max_replicas, 0, INTEGER_CONFIG, NULL, NULL), + createIntConfig("rdma-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.rdma_ctx_config.port, 0, INTEGER_CONFIG, NULL, updateRdmaPort), + createIntConfig("rdma-rx-size", NULL, IMMUTABLE_CONFIG, 64 * 1024, 16 * 1024 * 1024, server.rdma_ctx_config.rx_size, 1024 * 1024, INTEGER_CONFIG, NULL, NULL), + createIntConfig("rdma-completion-vector", NULL, IMMUTABLE_CONFIG, -1, 1024, server.rdma_ctx_config.completion_vector, -1, INTEGER_CONFIG, NULL, NULL), /* Unsigned int configs */ createUIntConfig("maxclients", NULL, MODIFIABLE_CONFIG, 1, UINT_MAX, server.maxclients, 10000, INTEGER_CONFIG, NULL, updateMaxclients), @@ -3316,7 +3389,8 @@ standardConfig static_configs[] = { createSpecialConfig("client-output-buffer-limit", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigClientOutputBufferLimitOption, getConfigClientOutputBufferLimitOption, rewriteConfigClientOutputBufferLimitOption, NULL), createSpecialConfig("oom-score-adj-values", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigOOMScoreAdjValuesOption, getConfigOOMScoreAdjValuesOption, rewriteConfigOOMScoreAdjValuesOption, updateOOMScoreAdj), createSpecialConfig("notify-keyspace-events", NULL, MODIFIABLE_CONFIG, setConfigNotifyKeyspaceEventsOption, getConfigNotifyKeyspaceEventsOption, rewriteConfigNotifyKeyspaceEventsOption, NULL), - createSpecialConfig("bind", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigBindOption, getConfigBindOption, rewriteConfigBindOption, applyBind), + createSpecialConfig("bind", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigSocketBindOption, getConfigBindOption, rewriteConfigSocketBindOption, applyBind), + createSpecialConfig("rdma-bind", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigRdmaBindOption, getConfigRdmaBindOption, rewriteConfigRdmaBindOption, applyRdmaBind), createSpecialConfig("replicaof", "slaveof", IMMUTABLE_CONFIG | MULTI_ARG_CONFIG, setConfigReplicaOfOption, getConfigReplicaOfOption, rewriteConfigReplicaOfOption, NULL), createSpecialConfig("latency-tracking-info-percentiles", NULL, MODIFIABLE_CONFIG | MULTI_ARG_CONFIG, setConfigLatencyTrackingInfoPercentilesOutputOption, getConfigLatencyTrackingInfoPercentilesOutputOption, rewriteConfigLatencyTrackingInfoPercentilesOutputOption, NULL), diff --git a/src/connection.c b/src/connection.c index f0c1c2d364..8807541d77 100644 --- a/src/connection.c +++ b/src/connection.c @@ -66,6 +66,9 @@ int connTypeInitialize(void) { /* may fail if without BUILD_TLS=yes */ RedisRegisterConnectionTypeTLS(); + /* may fail if without BUILD_RDMA=yes */ + RegisterConnectionTypeRdma(); + return C_OK; } diff --git a/src/connection.h b/src/connection.h index 0762441732..8a2775ee34 100644 --- a/src/connection.h +++ b/src/connection.h @@ -60,6 +60,7 @@ typedef enum { #define CONN_TYPE_SOCKET "tcp" #define CONN_TYPE_UNIX "unix" #define CONN_TYPE_TLS "tls" +#define CONN_TYPE_RDMA "rdma" #define CONN_TYPE_MAX 8 /* 8 is enough to be extendable */ typedef void (*ConnectionCallbackFunc)(struct connection *conn); @@ -79,6 +80,7 @@ typedef struct ConnectionType { int (*addr)(connection *conn, char *ip, size_t ip_len, int *port, int remote); int (*is_local)(connection *conn); int (*listen)(connListener *listener); + void (*closeListener)(connListener *listener); /* create/shutdown/close connection */ connection *(*conn_create)(void); @@ -442,6 +444,13 @@ static inline int connListen(connListener *listener) { return listener->ct->listen(listener); } +/* Close a listened listener */ +static inline void connCloseListener(connListener *listener) { + if (listener->count) { + listener->ct->closeListener(listener); + } +} + /* Get accept_handler of a connection type */ static inline aeFileProc *connAcceptHandler(ConnectionType *ct) { if (ct) return ct->accept_handler; @@ -454,6 +463,7 @@ sds getListensInfoString(sds info); int RedisRegisterConnectionTypeSocket(void); int RedisRegisterConnectionTypeUnix(void); int RedisRegisterConnectionTypeTLS(void); +int RegisterConnectionTypeRdma(void); /* Return 1 if connection is using TLS protocol, 0 if otherwise. */ static inline int connIsTLS(connection *conn) { diff --git a/src/rdma.c b/src/rdma.c index 7cdcb24913..de7ea396a1 100644 --- a/src/rdma.c +++ b/src/rdma.c @@ -10,9 +10,10 @@ #define VALKEYMODULE_CORE_MODULE #include "server.h" - -#if defined USE_RDMA && defined __linux__ /* currently RDMA is only supported on Linux */ #include "connection.h" + +#if defined __linux__ /* currently RDMA is only supported on Linux */ +#if (USE_RDMA == 1 /* BUILD_YES */) || ((USE_RDMA == 2 /* BUILD_MODULE */) && (BUILD_RDMA_MODULE == 2)) #include "connhelpers.h" #include @@ -128,12 +129,10 @@ typedef struct rdma_listener { static list *pending_list; static rdma_listener *rdma_listeners; +static serverRdmaContextConfig *rdma_config; static ConnectionType CT_RDMA; -static int valkey_rdma_rx_size = VALKEY_RDMA_DEFAULT_RX_SIZE; -static int valkey_rdma_comp_vector = -1; /* -1 means a random one */ - static void serverRdmaError(char *err, const char *fmt, ...) { va_list ap; @@ -272,7 +271,7 @@ static int rdmaSetupIoBuf(RdmaContext *ctx, struct rdma_cm_id *cm_id) { /* setup recv buf & MR */ access = IBV_ACCESS_LOCAL_WRITE | IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE; - length = valkey_rdma_rx_size; + length = rdma_config->rx_size; ctx->rx.addr = page_aligned_zalloc(length); ctx->rx.length = length; ctx->rx.mr = ibv_reg_mr(ctx->pd, ctx->rx.addr, length, access); @@ -295,6 +294,7 @@ static int rdmaCreateResource(RdmaContext *ctx, struct rdma_cm_id *cm_id) { struct ibv_comp_channel *comp_channel = NULL; struct ibv_cq *cq = NULL; struct ibv_pd *pd = NULL; + int comp_vector = rdma_config->completion_vector; if (ibv_query_device(cm_id->verbs, &device_attr)) { serverLog(LL_WARNING, "RDMA: ibv ibv query device failed"); @@ -317,8 +317,13 @@ static int rdmaCreateResource(RdmaContext *ctx, struct rdma_cm_id *cm_id) { ctx->comp_channel = comp_channel; + /* negative number means a random one */ + if (comp_vector < 0) { + comp_vector = abs((int)random()); + } + cq = ibv_create_cq(cm_id->verbs, VALKEY_RDMA_MAX_WQE * 2, NULL, comp_channel, - valkey_rdma_comp_vector % cm_id->verbs->num_comp_vectors); + comp_vector % cm_id->verbs->num_comp_vectors); if (!cq) { serverLog(LL_WARNING, "RDMA: ibv create cq failed"); return C_ERR; @@ -1610,9 +1615,28 @@ int connRdmaListen(connListener *listener) { rdma_listener++; } + rdma_config = listener->priv; return C_OK; } +static void connRdmaCloseListener(connListener *listener) { + /* Close old servers */ + for (int i = 0; i < listener->count; i++) { + if (listener->fd[i] == -1) continue; + + aeDeleteFileEvent(server.el, listener->fd[i], AE_READABLE); + listener->fd[i] = -1; + struct rdma_listener *rdma_listener = &rdma_listeners[i]; + rdma_destroy_id(rdma_listener->cm_id); + rdma_destroy_event_channel(rdma_listener->cm_channel); + } + + listener->count = 0; + zfree(rdma_listeners); + rdma_listeners = NULL; + rdma_config = NULL; +} + static int connRdmaAddr(connection *conn, char *ip, size_t ip_len, int *port, int remote) { rdma_connection *rdma_conn = (rdma_connection *)conn; struct rdma_cm_id *cm_id = rdma_conn->cm_id; @@ -1740,6 +1764,7 @@ static ConnectionType CT_RDMA = { //.cluster_accept_handler = NULL, .is_local = connRdmaIsLocal, .listen = connRdmaListen, + .closeListener = connRdmaCloseListener, .addr = connRdmaAddr, /* create/close connection */ @@ -1769,17 +1794,6 @@ static ConnectionType CT_RDMA = { .process_pending_data = rdmaProcessPendingData, }; -static struct connListener *rdmaListener(void) { - static struct connListener *listener = NULL; - - if (listener) return listener; - - listener = listenerByType(CONN_TYPE_RDMA); - serverAssert(listener != NULL); - - return listener; -} - ConnectionType *connectionTypeRdma(void) { static ConnectionType *ct_rdma = NULL; @@ -1791,133 +1805,28 @@ ConnectionType *connectionTypeRdma(void) { return ct_rdma; } -/* rdma listener has different create/close logic from TCP, we can't re-use 'int changeListener(connListener *listener)' - * directly */ -static int rdmaChangeListener(void) { - struct connListener *listener = rdmaListener(); - - /* Close old servers */ - for (int i = 0; i < listener->count; i++) { - if (listener->fd[i] == -1) continue; - - aeDeleteFileEvent(server.el, listener->fd[i], AE_READABLE); - listener->fd[i] = -1; - struct rdma_listener *rdma_listener = &rdma_listeners[i]; - rdma_destroy_id(rdma_listener->cm_id); - rdma_destroy_event_channel(rdma_listener->cm_channel); - } - - listener->count = 0; - zfree(rdma_listeners); - rdma_listeners = NULL; - - closeListener(listener); - - /* Just close the server if port disabled */ - if (listener->port == 0) { - if (server.set_proc_title) serverSetProcTitle(NULL); - return VALKEYMODULE_OK; - } - - /* Re-create listener */ - if (connListen(listener) != C_OK) { - return VALKEYMODULE_ERR; - } - - /* Create event handlers */ - if (createSocketAcceptHandler(listener, listener->ct->accept_handler) != C_OK) { - serverPanic("Unrecoverable error creating %s accept handler.", listener->ct->get_type(NULL)); - } - - if (server.set_proc_title) serverSetProcTitle(NULL); - - return VALKEYMODULE_OK; -} - -#ifdef BUILD_RDMA_MODULE - -#include "release.h" - -static long long rdmaGetPort(const char *name, void *privdata) { - UNUSED(name); - UNUSED(privdata); - struct connListener *listener = rdmaListener(); - - return listener->port; -} - -static int rdmaSetPort(const char *name, long long val, void *privdata, ValkeyModuleString **err) { - UNUSED(name); - UNUSED(privdata); - UNUSED(err); - struct connListener *listener = rdmaListener(); - listener->port = val; - - return VALKEYMODULE_OK; -} - -static ValkeyModuleString *rdma_bind; - -static void rdmaBuildBind(void *ctx) { - struct connListener *listener = rdmaListener(); - - if (rdma_bind) ValkeyModule_FreeString(NULL, rdma_bind); - - sds rdma_bind_str = sdsjoin(listener->bindaddr, listener->bindaddr_count, " "); - rdma_bind = ValkeyModule_CreateString(ctx, rdma_bind_str, sdslen(rdma_bind_str)); +int RegisterConnectionTypeRdma(void) { + return connTypeRegister(&CT_RDMA); } -static ValkeyModuleString *rdmaGetBind(const char *name, void *privdata) { - UNUSED(name); - UNUSED(privdata); +#else - return rdma_bind; +int RegisterConnectionTypeRdma(void) { + serverLog(LL_VERBOSE, "Connection type %s not builtin", CONN_TYPE_RDMA); + return C_ERR; } -static int rdmaSetBind(const char *name, ValkeyModuleString *val, void *privdata, ValkeyModuleString **err) { - UNUSED(name); - UNUSED(err); - struct connListener *listener = rdmaListener(); - const char *bind = ValkeyModule_StringPtrLen(val, NULL); - int nexts; - sds *exts = sdssplitlen(bind, strlen(bind), " ", 1, &nexts); - - if (nexts > CONFIG_BINDADDR_MAX) { - serverLog(LL_WARNING, "RDMA: Unsupported bind ( > %d)", CONFIG_BINDADDR_MAX); - return VALKEYMODULE_ERR; - } - - /* Free old bind addresses */ - for (int j = 0; j < listener->bindaddr_count; j++) { - zfree(listener->bindaddr[j]); - } - - for (int j = 0; j < nexts; j++) listener->bindaddr[j] = zstrdup(exts[j]); - listener->bindaddr_count = nexts; - - sdsfreesplitres(exts, nexts); - rdmaBuildBind(privdata); - - return VALKEYMODULE_OK; -} +#endif -static int rdmaApplyListener(ValkeyModuleCtx *ctx, void *privdata, ValkeyModuleString **err) { - UNUSED(ctx); - UNUSED(privdata); - UNUSED(err); +#if BUILD_RDMA_MODULE == 2 /* BUILD_MODULE */ - return rdmaChangeListener(); -} +#include "release.h" -static void rdmaListenerAddConfig(void *ctx) { - serverAssert(ValkeyModule_RegisterNumericConfig(ctx, "port", 0, VALKEYMODULE_CONFIG_DEFAULT, 0, 65535, rdmaGetPort, - rdmaSetPort, rdmaApplyListener, NULL) == VALKEYMODULE_OK); - serverAssert(ValkeyModule_RegisterStringConfig(ctx, "bind", "", VALKEYMODULE_CONFIG_DEFAULT, rdmaGetBind, - rdmaSetBind, rdmaApplyListener, ctx) == VALKEYMODULE_OK); - serverAssert(ValkeyModule_LoadConfigs(ctx) == VALKEYMODULE_OK); -} int ValkeyModule_OnLoad(void *ctx, ValkeyModuleString **argv, int argc) { + UNUSED(argv); + UNUSED(argc); + /* Connection modules MUST be part of the same build as valkey. */ if (strcmp(REDIS_BUILD_ID_RAW, serverBuildIdRaw())) { serverLog(LL_NOTICE, "Connection type %s was not built together with the valkey-server used.", CONN_TYPE_RDMA); @@ -1936,40 +1845,6 @@ int ValkeyModule_OnLoad(void *ctx, ValkeyModuleString **argv, int argc) { if (connTypeRegister(&CT_RDMA) != C_OK) return VALKEYMODULE_ERR; - rdmaListenerAddConfig(ctx); - - struct connListener *listener = rdmaListener(); - listener->ct = connectionTypeRdma(); - listener->bindaddr = zcalloc_num(CONFIG_BINDADDR_MAX, sizeof(listener->bindaddr[0])); - - for (int i = 0; i < argc; i++) { - robj *str = (robj *)argv[i]; - int nexts; - sds *exts = sdssplitlen(str->ptr, strlen(str->ptr), "=", 1, &nexts); - if (nexts != 2) { - serverLog(LL_WARNING, "RDMA: Unsupported argument \"%s\"", (char *)str->ptr); - return VALKEYMODULE_ERR; - } - - if (!strcasecmp(exts[0], "bind")) { - listener->bindaddr[listener->bindaddr_count++] = zstrdup(exts[1]); - } else if (!strcasecmp(exts[0], "port")) { - listener->port = atoi(exts[1]); - } else if (!strcasecmp(exts[0], "rx-size")) { - valkey_rdma_rx_size = atoi(exts[1]); - } else if (!strcasecmp(exts[0], "comp-vector")) { - valkey_rdma_comp_vector = atoi(exts[1]); - } else { - serverLog(LL_WARNING, "RDMA: Unsupported argument \"%s\"", (char *)str->ptr); - return VALKEYMODULE_ERR; - } - - sdsfreesplitres(exts, nexts); - } - - rdmaBuildBind(ctx); - if (valkey_rdma_comp_vector == -1) valkey_rdma_comp_vector = abs((int)random()); - return VALKEYMODULE_OK; } @@ -1981,4 +1856,11 @@ int ValkeyModule_OnUnload(void *arg) { #endif /* BUILD_RDMA_MODULE */ -#endif /* USE_RDMA && __linux__ */ +#else /* __linux__ */ + +int RegisterConnectionTypeRdma(void) { + serverLog(LL_VERBOSE, "Connection type %s is supported on Linux only", CONN_TYPE_RDMA); + return C_ERR; +} + +#endif /* __linux__ */ diff --git a/src/server.c b/src/server.c index a83ef9096c..df57659715 100644 --- a/src/server.c +++ b/src/server.c @@ -2482,19 +2482,6 @@ void checkTcpBacklogSettings(void) { #endif } -void closeListener(connListener *sfd) { - int j; - - for (j = 0; j < sfd->count; j++) { - if (sfd->fd[j] == -1) continue; - - aeDeleteFileEvent(server.el, sfd->fd[j], AE_READABLE); - close(sfd->fd[j]); - } - - sfd->count = 0; -} - /* Create an event handler for accepting new connections in TCP or TLS domain sockets. * This works atomically for all socket fds */ int createSocketAcceptHandler(connListener *sfd, aeFileProc *accept_handler) { @@ -2558,7 +2545,7 @@ int listenToPort(connListener *sfd) { continue; /* Rollback successful listens before exiting */ - closeListener(sfd); + connCloseListener(sfd); return C_ERR; } if (server.socket_mark_id > 0) anetSetSockMarkId(NULL, sfd->fd[sfd->count], server.socket_mark_id); @@ -2899,6 +2886,17 @@ void initListeners(void) { listener->priv = &server.unix_ctx_config; /* Unix socket specified */ } + if (server.rdma_ctx_config.port != 0) { + conn_index = connectionIndexByType(CONN_TYPE_RDMA); + if (conn_index < 0) serverPanic("Failed finding connection listener of %s", CONN_TYPE_RDMA); + listener = &server.listeners[conn_index]; + listener->bindaddr = server.rdma_ctx_config.bindaddr; + listener->bindaddr_count = server.rdma_ctx_config.bindaddr_count; + listener->port = server.rdma_ctx_config.port; + listener->ct = connectionByType(CONN_TYPE_RDMA); + listener->priv = &server.rdma_ctx_config; + } + /* create all the configured listener, and add handler to start to accept */ int listen_fds = 0; for (int j = 0; j < CONN_TYPE_MAX; j++) { @@ -6297,7 +6295,7 @@ connListener *listenerByType(const char *typename) { /* Close original listener, re-create a new listener from the updated bind address & port */ int changeListener(connListener *listener) { /* Close old servers */ - closeListener(listener); + connCloseListener(listener); /* Just close the server if port disabled */ if (listener->port == 0) { diff --git a/src/server.h b/src/server.h index 70bd3868c3..b9e8be9479 100644 --- a/src/server.h +++ b/src/server.h @@ -1614,6 +1614,17 @@ typedef struct serverUnixContextConfig { unsigned int perm; /* UNIX socket permission (see mode_t) */ } serverUnixContextConfig; +/*----------------------------------------------------------------------------- + * RDMA Context Configuration + *----------------------------------------------------------------------------*/ +typedef struct serverRdmaContextConfig { + char *bindaddr[CONFIG_BINDADDR_MAX]; + int bindaddr_count; + int port; + int rx_size; + int completion_vector; +} serverRdmaContextConfig; + /*----------------------------------------------------------------------------- * AOF manifest definition *----------------------------------------------------------------------------*/ @@ -2229,6 +2240,7 @@ struct valkeyServer { int tls_auth_clients; serverTLSContextConfig tls_ctx_config; serverUnixContextConfig unix_ctx_config; + serverRdmaContextConfig rdma_ctx_config; /* cpu affinity */ char *server_cpulist; /* cpu affinity list of server main/io thread. */ char *bio_cpulist; /* cpu affinity list of bio thread. */ @@ -3293,7 +3305,6 @@ void setupSignalHandlers(void); int createSocketAcceptHandler(connListener *sfd, aeFileProc *accept_handler); connListener *listenerByType(const char *typename); int changeListener(connListener *listener); -void closeListener(connListener *listener); struct serverCommand *lookupSubcommand(struct serverCommand *container, sds sub_name); struct serverCommand *lookupCommand(robj **argv, int argc); struct serverCommand *lookupCommandBySdsLogic(dict *commands, sds s); diff --git a/src/socket.c b/src/socket.c index 7344d66ad8..d89e6c8767 100644 --- a/src/socket.c +++ b/src/socket.c @@ -339,6 +339,19 @@ static int connSocketListen(connListener *listener) { return listenToPort(listener); } +static void connSocketCloseListener(connListener *listener) { + int j; + + for (j = 0; j < listener->count; j++) { + if (listener->fd[j] == -1) continue; + + aeDeleteFileEvent(server.el, listener->fd[j], AE_READABLE); + close(listener->fd[j]); + } + + listener->count = 0; +} + static int connSocketBlockingConnect(connection *conn, const char *addr, int port, long long timeout) { int fd = anetTcpNonBlockConnect(NULL, addr, port); if (fd == -1) { @@ -395,6 +408,7 @@ static ConnectionType CT_Socket = { .addr = connSocketAddr, .is_local = connSocketIsLocal, .listen = connSocketListen, + .closeListener = connSocketCloseListener, /* create/shutdown/close connection */ .conn_create = connCreateSocket, diff --git a/src/tls.c b/src/tls.c index d1dd567354..48b75553de 100644 --- a/src/tls.c +++ b/src/tls.c @@ -805,6 +805,10 @@ static int connTLSListen(connListener *listener) { return listenToPort(listener); } +static void connTLSCloseListener(connListener *listener) { + connectionTypeTcp()->closeListener(listener); +} + static void connTLSShutdown(connection *conn_) { tls_connection *conn = (tls_connection *)conn_; @@ -1147,6 +1151,7 @@ static ConnectionType CT_TLS = { .addr = connTLSAddr, .is_local = connTLSIsLocal, .listen = connTLSListen, + .closeListener = connTLSCloseListener, /* create/shutdown/close connection */ .conn_create = connCreateTLS, diff --git a/src/unix.c b/src/unix.c index 35778779f9..86df05bd52 100644 --- a/src/unix.c +++ b/src/unix.c @@ -74,6 +74,10 @@ static int connUnixListen(connListener *listener) { return C_OK; } +static void connUnixCloseListener(connListener *listener) { + connectionTypeTcp()->closeListener(listener); +} + static connection *connCreateUnix(void) { connection *conn = zcalloc(sizeof(connection)); conn->type = &CT_Unix; @@ -174,6 +178,7 @@ static ConnectionType CT_Unix = { .addr = connUnixAddr, .is_local = connUnixIsLocal, .listen = connUnixListen, + .closeListener = connUnixCloseListener, /* create/shutdown/close connection */ .conn_create = connCreateUnix, diff --git a/tests/rdma/run.py b/tests/rdma/run.py index 0724c27adc..09168f368a 100755 --- a/tests/rdma/run.py +++ b/tests/rdma/run.py @@ -63,7 +63,7 @@ def test_rdma(ipaddr): rdmapath = valkeydir + "/src/valkey-rdma.so" svrcmd = [svrpath, "--port", "0", "--loglevel", "verbose", "--protected-mode", "yes", "--appendonly", "no", "--daemonize", "no", "--dir", valkeydir + "/tests/rdma/tmp", - "--loadmodule", rdmapath, "port=6379", "bind=" + ipaddr] + "--loadmodule", rdmapath, "--rdma-port", "6379", "--rdma-bind", ipaddr] svr = subprocess.Popen(svrcmd, shell=False, stdout=subprocess.PIPE) try: diff --git a/tests/unit/introspection.tcl b/tests/unit/introspection.tcl index 352f5f183e..d79bb1c7da 100644 --- a/tests/unit/introspection.tcl +++ b/tests/unit/introspection.tcl @@ -558,6 +558,10 @@ start_server {tags {"introspection"}} { req-res-logfile client-default-resp dual-channel-replication-enabled + rdma-completion-vector + rdma-rx-size + rdma-bind + rdma-port } if {!$::tls} { diff --git a/valkey.conf b/valkey.conf index bf82b01874..8d3e11c515 100644 --- a/valkey.conf +++ b/valkey.conf @@ -300,6 +300,54 @@ tcp-keepalive 300 # # tls-session-cache-timeout 60 +################################### RDMA ###################################### + +# Valkey Over RDMA is experimental, it may be changed or be removed in any minor or major version. +# By default, RDMA is disabled. To enable it, the "rdma-port" configuration +# directive can be used to define RDMA-listening ports. +# +# rdma-port 6379 +# rdma-bind 192.168.1.100 + +# The RDMA receive transfer buffer is 1M by default. It can be set between 64K and 16M. +# Note that page size aligned size is preferred. +# +# rdma-rx-size 1048576 + +# The RDMA completion queue will use the completion vector to signal completion events +# via hardware interrupts. A large number of hardware interrupts can affect CPU performance. +# It is possible to tune the performance using rdma-completion-vector. +# +# Example 1. a) Pin hardware interrupt vectors [0, 3] to CPU [0, 3]. +# b) Set CPU affinity for valkey to CPU [4, X]. +# c) Any valkey server uses a random RDMA completion vector [-1]. +# All valkey servers will not affect each other and will be isolated from kernel interrupts. +# +# SYS SYS SYS SYS VALKEY VALKEY VALKEY +# | | | | | | | +# CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 ... CPUX +# | | | | +# INTR0 INTR1 INTR2 INTR3 +# +# Example 2. a) 1:1 pin hardware interrupt vectors [0, X] to CPU [0, X]. +# b) Set CPU affinity for valkey [M] to CPU [M]. +# c) Valkey server [M] uses RDMA completion vector [M]. +# A single CPU [M] handles hardware interrupts, the RDMA completion vector [M], +# and the valkey server [M] within its context only. +# This avoids overhead and function calls across multiple CPUs, fully isolating +# each valkey server from one another. +# +# VALKEY VALKEY VALKEY VALKEY VALKEY VALKEY VALKEY +# | | | | | | | +# CPU0 CPU1 CPU2 CPU3 CPU4 CPU5 ... CPUX +# | | | | | | | +# INTR0 INTR1 INTR2 INTR3 INTR4 INTR5 INTRX +# +# Use 0 and positive numbers to specify the RDMA completion vector, or specify -1 to allow +# the server to use a random vector for a new connection. The default vector is -1. +# +# rdma-completion-vector 0 + ################################# GENERAL ##################################### # By default the server does not run as a daemon. Use 'yes' if you need it. From c8ceb2ee255c899b0cb05b69f0511fc7dcf4ddca Mon Sep 17 00:00:00 2001 From: Stav Ben-Tov <90314138+stav-bentov@users.noreply.github.com> Date: Sun, 1 Dec 2024 13:24:18 +0200 Subject: [PATCH 55/60] Use zfree_with_size for client buffer (#1376) Replace occurrences of 'zfree' with 'zfree_with_size' to improve performance. 'zfree_with_size' function avoids calling 'zmalloc_size' to retrieve buffer size and uses previuos calculation of size for calling 'zfree_with_size'. This results in faster memory deallocation and reduces overhead. Signed-off-by: stav bentov Co-authored-by: stav bentov --- src/networking.c | 2 +- src/server.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/networking.c b/src/networking.c index 97479967f6..bbd684a3e5 100644 --- a/src/networking.c +++ b/src/networking.c @@ -1760,7 +1760,7 @@ void freeClient(client *c) { /* Free data structures. */ listRelease(c->reply); c->reply = NULL; - zfree(c->buf); + zfree_with_size(c->buf, c->buf_usable_size); c->buf = NULL; freeReplicaReferencedReplBuffer(c); freeClientArgv(c); diff --git a/src/server.c b/src/server.c index df57659715..ef9f523145 100644 --- a/src/server.c +++ b/src/server.c @@ -889,9 +889,10 @@ int clientsCronResizeOutputBuffer(client *c, mstime_t now_ms) { if (new_buffer_size) { oldbuf = c->buf; + size_t oldbuf_size = c->buf_usable_size; c->buf = zmalloc_usable(new_buffer_size, &c->buf_usable_size); memcpy(c->buf, oldbuf, c->bufpos); - zfree(oldbuf); + zfree_with_size(oldbuf, oldbuf_size); } return 0; } From 9c48f567907087637e19bf30a5a137d8b50e0df3 Mon Sep 17 00:00:00 2001 From: Binbin Date: Sun, 1 Dec 2024 21:33:21 +0800 Subject: [PATCH 56/60] Reset repl_down_since to zero only on state change (#1149) We should reset repl_down_since only on state change, in the current code, if the rdb channel in the dual channel is normal, that is, rdb is loaded normally, but the psync channel is abnormal, we will set repl_down_since 0 here. If the primary is down at this time, the replica may be abnormal when calculating data_age in cluster failover, since repl_state != REPL_STATE_CONNECTED, this causes the replica to be unable to initiate an election due to the old data_age. In dualChannelSyncHandleRdbLoadCompletion, if the psync channel is not established, the function will return. We will set repl_state to REPL_STATE_CONNECTED and set repl_down_since to 0 in dualChannelSyncSuccess, that is, in establishPrimaryConnection. See also 677d10b2a8ff7f13033ccfe56ffcd246dbe70fb6 for more details. Signed-off-by: Binbin --- src/replication.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/replication.c b/src/replication.c index 260da1cd6e..d17199bfc3 100644 --- a/src/replication.c +++ b/src/replication.c @@ -2405,10 +2405,10 @@ void readSyncBulkPayload(connection *conn) { } else { replicationCreatePrimaryClient(server.repl_transfer_s, rsi.repl_stream_db); server.repl_state = REPL_STATE_CONNECTED; + server.repl_down_since = 0; /* Send the initial ACK immediately to put this replica in online state. */ replicationSendAck(); } - server.repl_down_since = 0; /* Fire the primary link modules event. */ moduleFireServerEvent(VALKEYMODULE_EVENT_PRIMARY_LINK_CHANGE, VALKEYMODULE_SUBEVENT_PRIMARY_LINK_UP, NULL); From 7043ef0bbb627b66bcaa75351b1b141c96852df8 Mon Sep 17 00:00:00 2001 From: Amit Nagler <58042354+naglera@users.noreply.github.com> Date: Sun, 1 Dec 2024 15:33:43 +0200 Subject: [PATCH 57/60] Split dual-channel COB overrun tests to separate servers (#1374) 1. The test isn't waiting long enough for the output buffer to overrun. This problem is happening because an error from the previous test is bleeding into the current test's logs. The simplest fix would be to split these tests. 2. Increased replication timeout to ensure sync fails due to output buffer overrun before a timeout occurs. Fixes #1367 Signed-off-by: naglera --- .../integration/dual-channel-replication.tcl | 33 ++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tests/integration/dual-channel-replication.tcl b/tests/integration/dual-channel-replication.tcl index 055ed670ab..e417dad6c9 100644 --- a/tests/integration/dual-channel-replication.tcl +++ b/tests/integration/dual-channel-replication.tcl @@ -775,7 +775,7 @@ start_server {tags {"dual-channel-replication external:skip"}} { $replica config set dual-channel-replication-enabled yes $replica config set loglevel debug - $replica config set repl-timeout 10 + $replica config set repl-timeout 60 $primary config set repl-backlog-size 1mb test "Test dual-channel-replication primary gets cob overrun before established psync" { @@ -815,6 +815,37 @@ start_server {tags {"dual-channel-replication external:skip"}} { } else { fail "Primary should abort sync" } + stop_write_load $load_handle0 + stop_write_load $load_handle1 + stop_write_load $load_handle2 + } +} + +start_server {tags {"dual-channel-replication external:skip"}} { + set primary [srv 0 client] + set primary_host [srv 0 host] + set primary_port [srv 0 port] + set loglines [count_log_lines 0] + + $primary config set repl-diskless-sync yes + $primary config set dual-channel-replication-enabled yes + $primary config set client-output-buffer-limit "replica 1100k 0 0" + $primary config set loglevel debug + start_server {} { + set replica [srv 0 client] + set replica_host [srv 0 host] + set replica_port [srv 0 port] + set replica_log [srv 0 stdout] + set replica_pid [srv 0 pid] + + set load_handle0 [start_write_load $primary_host $primary_port 60] + set load_handle1 [start_write_load $primary_host $primary_port 60] + set load_handle2 [start_write_load $primary_host $primary_port 60] + + $replica config set dual-channel-replication-enabled yes + $replica config set loglevel debug + $replica config set repl-timeout 60 + $primary config set repl-backlog-size 1mb $replica debug pause-after-fork 1 $primary debug populate 1000 primary 100000 From 90475af59429583182402ee3b408d7bcb36d56cd Mon Sep 17 00:00:00 2001 From: Vadym Khoptynets <1099644+poiuj@users.noreply.github.com> Date: Sun, 1 Dec 2024 17:12:27 +0200 Subject: [PATCH 58/60] Free strings during BGSAVE/BGAOFRW to reduce copy-on-write (#905) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Motivation** Copy-on-write (COW) amplification refers to the issue where writing to a small object leads to the entire page being cloned, resulting in inefficient memory usage. This issue arises during the BGSAVE process, which can be particularly problematic on instances with limited memory. If the BGSAVE process could release unneeded memory, it could reduce memory consumption. To address this, the BGSAVE process calls the `madvise` function to signal the operating system to reclaim the buffer. However, this approach does not work for buffers smaller than a page (usually 4KiB). Even after multiple such calls, where a full page may be free, the operating system will not reclaim it. To solve this issue, we can call `zfree` directly. This allows the allocator (jemalloc) to handle the bookkeeping and release pages when buffers are no longer needed. This approach reduces copy-on-write events. **Benchmarks** To understand how usage of `zfree` affects BGSAVE and the memory consumption I ran 45 benchmarks that compares my clonewith the vanilla version. The benchmark has the following steps: 1. Start a new Valkey process 2. Fill the DB with data sequentially 3. Run a warmup to randomize the memory layout 4. Introduce fragmentation by deleting part of the keys 5. In parallel: 1. Trigger BGSAVE 2. Start 80/20 get/set load I played the following parameters to understand their influence: 1. Number of keys: 3M, 6M, and 12M. 2. Data size. While key themselves are of fixed length ~30 bytes, the value size is 120, 250, 500, 1000, and 2000 bytes. 3. Fragmentation. I delete 5%, 10%, and 15% of the original key range. I'm attaching a graph of BGSAVE process memory consumption. Instead of all benchmarks, I show the most representative runs IMO. 3m-fixed For 2000 bytes values peak memory usage is ~53% compared to vanilla. The peak happens at 57% BGSAVE progress. For 500 bytes values the peak is ~80% compared to vanilla. And happens at ~80% progress. For 120 bytes the difference is under 5%, and the patched version could even use more memory. ![500b-fixed](https://github.com/user-attachments/assets/b09451d3-4bce-4f33-b3db-2b5df2178ed2) For 12M keys, the peak is ~85% of the vanilla’s. Happens at ~70% mark. For 6M keys, the peak is ~87% of the vanilla’s. Happens at ~77% mark. For 3M keys, the peak is ~87% of the vanilla’s Happens at ~80% mark. **Changes** The PR contains 2 changes: 1. Static buffer for RDB comrpession. RDB compression leads to COW events even without any write load if we use `zfree`. It happens because the compression functions allocates a new buffer for each object. Together with freeing objects with `zfree` it leads to reusing of the memory shared with the main process. To deal with this problem, we use a pre-allocated constant 8K buffer for compression. If the object size is too big for this buffer, than we fall back to the ad hoc allocation behavior. 2. Freeing string objects instead of dismissing them Call to `zfree` is more expensive than direct call to `madvise`. But with #453 strings use the fast path – `zfree_with_size`. As a possible next step we can optimize `zfree` for other data types as well. --------- Signed-off-by: Vadym Khoptynets Signed-off-by: ranshid <88133677+ranshid@users.noreply.github.com> Co-authored-by: ranshid <88133677+ranshid@users.noreply.github.com> Co-authored-by: Viktor Söderqvist --- src/object.c | 9 +++++++-- src/rdb.c | 19 ++++++++++++------- 2 files changed, 19 insertions(+), 9 deletions(-) diff --git a/src/object.c b/src/object.c index 8c1cf64892..035198ad89 100644 --- a/src/object.c +++ b/src/object.c @@ -398,9 +398,14 @@ void decrRefCount(robj *o) { } } -/* See dismissObject() */ +/* See dismissObject(). sds is an exception, because the allocation + * size is known. Instead of dismissing it with madvise(MADV_DONTNEED) + * we free it via the allocator, which has minimal overhead when the + * size is known. This has advantage that it allows the allocator to + * accumulate free buffers to free whole pages, while madvise is nop + * if the buffer is less than a page. */ void dismissSds(sds s) { - dismissMemory(sdsAllocPtr(s), sdsAllocSize(s)); + sdsfree(s); } /* See dismissObject() */ diff --git a/src/rdb.c b/src/rdb.c index 1c200e54f5..ca904f7f98 100644 --- a/src/rdb.c +++ b/src/rdb.c @@ -49,6 +49,9 @@ #include #include +/* Size of the static buffer used for rdbcompression */ +#define LZF_STATIC_BUFFER_SIZE (8 * 1024) + /* This macro is called when the internal RDB structure is corrupt */ #define rdbReportCorruptRDB(...) rdbReportError(1, __LINE__, __VA_ARGS__) /* This macro is called when RDB read failed (possibly a short read) */ @@ -388,18 +391,20 @@ ssize_t rdbSaveLzfBlob(rio *rdb, void *data, size_t compress_len, size_t origina ssize_t rdbSaveLzfStringObject(rio *rdb, unsigned char *s, size_t len) { size_t comprlen, outlen; void *out; + static void *buffer = NULL; /* We require at least four bytes compression for this to be worth it */ if (len <= 4) return 0; outlen = len - 4; - if ((out = zmalloc(outlen + 1)) == NULL) return 0; - comprlen = lzf_compress(s, len, out, outlen); - if (comprlen == 0) { - zfree(out); - return 0; + if (outlen < LZF_STATIC_BUFFER_SIZE) { + if (!buffer) buffer = zmalloc(LZF_STATIC_BUFFER_SIZE); + out = buffer; + } else { + if ((out = zmalloc(outlen + 1)) == NULL) return 0; } - ssize_t nwritten = rdbSaveLzfBlob(rdb, out, comprlen, len); - zfree(out); + comprlen = lzf_compress(s, len, out, outlen); + ssize_t nwritten = comprlen ? rdbSaveLzfBlob(rdb, out, comprlen, len) : 0; + if (out != buffer) zfree(out); return nwritten; } From fbbfe5d3d3833c74d86c324ca9ffee8b97856724 Mon Sep 17 00:00:00 2001 From: Binbin Date: Mon, 2 Dec 2024 15:55:24 +0800 Subject: [PATCH 59/60] Print logs when the cluster state changes to fail or the fail reason changes (#1188) This log allows us to easily distinguish between full coverage and minority partition when the cluster fails. Sometimes it is not easy to see the minority partition in a healthy shards (both primary and replicas). And we decided not to add a cluster_fail_reason field to cluster info. Given that there are only two reasons and both are well-known and if we ended up adding more down the road we can add it in the furture. Signed-off-by: Binbin --- src/cluster.h | 6 ++++++ src/cluster_legacy.c | 39 +++++++++++++++++++++++++++++++++++-- src/cluster_legacy.h | 1 + tests/unit/cluster/info.tcl | 23 ++++++++++++++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/src/cluster.h b/src/cluster.h index 65eadf4c65..142f2d70b3 100644 --- a/src/cluster.h +++ b/src/cluster.h @@ -12,6 +12,12 @@ #define CLUSTER_FAIL 1 /* The cluster can't work */ #define CLUSTER_NAMELEN 40 /* sha1 hex length */ +/* Reason why the cluster state changes to fail. When adding new reasons, + * make sure to update clusterLogFailReason. */ +#define CLUSTER_FAIL_NONE 0 +#define CLUSTER_FAIL_NOT_FULL_COVERAGE 1 +#define CLUSTER_FAIL_MINORITY_PARTITION 2 + /* Redirection errors returned by getNodeByQuery(). */ #define CLUSTER_REDIR_NONE 0 /* Node can serve the request. */ #define CLUSTER_REDIR_CROSS_SLOT 1 /* -CROSSSLOT request. */ diff --git a/src/cluster_legacy.c b/src/cluster_legacy.c index e4b25e265d..6ea8eb2e67 100644 --- a/src/cluster_legacy.c +++ b/src/cluster_legacy.c @@ -1082,6 +1082,7 @@ void clusterInit(void) { server.cluster->myself = NULL; server.cluster->currentEpoch = 0; server.cluster->state = CLUSTER_FAIL; + server.cluster->fail_reason = CLUSTER_FAIL_NONE; server.cluster->size = 0; server.cluster->todo_before_sleep = 0; server.cluster->nodes = dictCreate(&clusterNodesDictType); @@ -4493,7 +4494,7 @@ void clusterLogCantFailover(int reason) { case CLUSTER_CANT_FAILOVER_WAITING_DELAY: msg = "Waiting the delay before I can start a new failover."; break; case CLUSTER_CANT_FAILOVER_EXPIRED: msg = "Failover attempt expired."; break; case CLUSTER_CANT_FAILOVER_WAITING_VOTES: msg = "Waiting for votes, but majority still not reached."; break; - default: msg = "Unknown reason code."; break; + default: serverPanic("Unknown cant failover reason code."); } lastlog_time = time(NULL); serverLog(LL_NOTICE, "Currently unable to failover: %s", msg); @@ -5362,6 +5363,23 @@ void clusterCloseAllSlots(void) { * Cluster state evaluation function * -------------------------------------------------------------------------- */ +void clusterLogFailReason(int reason) { + if (reason == CLUSTER_FAIL_NONE) return; + + char *msg; + switch (reason) { + case CLUSTER_FAIL_NOT_FULL_COVERAGE: + msg = "At least one hash slot is not served by any available node. " + "Please check the 'cluster-require-full-coverage' configuration."; + break; + case CLUSTER_FAIL_MINORITY_PARTITION: + msg = "I am part of a minority partition."; + break; + default: serverPanic("Unknown fail reason code."); + } + serverLog(LL_WARNING, "Cluster is currently down: %s", msg); +} + /* The following are defines that are only used in the evaluation function * and are based on heuristics. Actually the main point about the rejoin and * writable delay is that they should be a few orders of magnitude larger @@ -5371,7 +5389,7 @@ void clusterCloseAllSlots(void) { #define CLUSTER_WRITABLE_DELAY 2000 void clusterUpdateState(void) { - int j, new_state; + int j, new_state, new_reason; int reachable_primaries = 0; static mstime_t among_minority_time; static mstime_t first_call_time = 0; @@ -5392,12 +5410,14 @@ void clusterUpdateState(void) { /* Start assuming the state is OK. We'll turn it into FAIL if there * are the right conditions. */ new_state = CLUSTER_OK; + new_reason = CLUSTER_FAIL_NONE; /* Check if all the slots are covered. */ if (server.cluster_require_full_coverage) { for (j = 0; j < CLUSTER_SLOTS; j++) { if (server.cluster->slots[j] == NULL || server.cluster->slots[j]->flags & (CLUSTER_NODE_FAIL)) { new_state = CLUSTER_FAIL; + new_reason = CLUSTER_FAIL_NOT_FULL_COVERAGE; break; } } @@ -5432,6 +5452,7 @@ void clusterUpdateState(void) { if (reachable_primaries < needed_quorum) { new_state = CLUSTER_FAIL; + new_reason = CLUSTER_FAIL_MINORITY_PARTITION; among_minority_time = mstime(); } } @@ -5455,7 +5476,21 @@ void clusterUpdateState(void) { serverLog(new_state == CLUSTER_OK ? LL_NOTICE : LL_WARNING, "Cluster state changed: %s", new_state == CLUSTER_OK ? "ok" : "fail"); server.cluster->state = new_state; + + /* Cluster state changes from ok to fail, print a log. */ + if (new_state == CLUSTER_FAIL) { + clusterLogFailReason(new_reason); + server.cluster->fail_reason = new_reason; + } } + + /* Cluster state is still fail, but the reason has changed, print a log. */ + if (new_state == CLUSTER_FAIL && new_reason != server.cluster->fail_reason) { + clusterLogFailReason(new_reason); + server.cluster->fail_reason = new_reason; + } + + if (new_state == CLUSTER_OK) server.cluster->fail_reason = CLUSTER_FAIL_NONE; } /* This function is called after the node startup in order to verify that data diff --git a/src/cluster_legacy.h b/src/cluster_legacy.h index 39148c748d..5595402a4d 100644 --- a/src/cluster_legacy.h +++ b/src/cluster_legacy.h @@ -370,6 +370,7 @@ struct clusterState { clusterNode *myself; /* This node */ uint64_t currentEpoch; int state; /* CLUSTER_OK, CLUSTER_FAIL, ... */ + int fail_reason; /* Why the cluster state changes to fail. */ int size; /* Num of primary nodes with at least one slot */ dict *nodes; /* Hash table of name -> clusterNode structures */ dict *shards; /* Hash table of shard_id -> list (of nodes) structures */ diff --git a/tests/unit/cluster/info.tcl b/tests/unit/cluster/info.tcl index 0d7b249899..f882378172 100644 --- a/tests/unit/cluster/info.tcl +++ b/tests/unit/cluster/info.tcl @@ -41,3 +41,26 @@ test "errorstats: rejected call due to MOVED Redirection" { } } ;# start_cluster + +start_cluster 3 0 {tags {external:skip cluster} overrides {cluster-node-timeout 1000}} { + test "fail reason changed" { + # Kill one primary, so the cluster fail with not-full-coverage. + pause_process [srv 0 pid] + wait_for_condition 1000 50 { + [CI 1 cluster_state] eq {fail} && + [CI 2 cluster_state] eq {fail} + } else { + fail "Cluster doesn't fail" + } + verify_log_message -1 "*At least one hash slot is not served by any available node*" 0 + verify_log_message -2 "*At least one hash slot is not served by any available node*" 0 + + # Kill one more primary, so the cluster fail with minority-partition. + pause_process [srv -1 pid] + wait_for_log_messages -2 {"*minority partition*"} 0 1000 50 + + resume_process [srv 0 pid] + resume_process [srv -1 pid] + wait_for_cluster_state ok + } +} From 3df609ef06f71c37a45049ec1df9611b9f763d55 Mon Sep 17 00:00:00 2001 From: Nugine Date: Tue, 3 Dec 2024 02:40:38 +0800 Subject: [PATCH 60/60] Optimize PFCOUNT, PFMERGE command by SIMD acceleration (#1293) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR optimizes the performance of HyperLogLog commands (PFCOUNT, PFMERGE) by adding AVX2 fast paths. Two AVX2 functions are added for conversion between raw representation and dense representation. They are 15 ~ 30 times faster than scalar implementaion. Note that sparse representation is not accelerated. AVX2 fast paths are enabled when the CPU supports AVX2 (checked at runtime) and the hyperloglog configuration is default (HLL_REGISTERS == 16384 && HLL_BITS == 6). `PFDEBUG SIMD (ON|OFF)` subcommand is added for unit tests. A new TCL unit test checks that the results produced by non-AVX2 and AVX2 implementations are exactly equal. When merging 3 dense hll structures, the benchmark shows a 12x speedup compared to the scalar version. ``` pfcount key1 key2 key3 pfmerge keyall key1 key2 key3 ``` ``` ====================================================================================================== Type Ops/sec Avg. Latency p50 Latency p99 Latency p99.9 Latency KB/sec ------------------------------------------------------------------------------------------------------ PFCOUNT-scalar 5665.56 35.29839 32.25500 63.99900 67.58300 608.60 PFCOUNT-avx2 72377.83 2.75834 2.67100 5.34300 6.81500 7774.96 ------------------------------------------------------------------------------------------------------ PFMERGE-scalar 9851.29 20.28806 20.09500 36.86300 39.16700 615.71 PFMERGE-avx2 125621.89 1.59126 1.55100 3.11900 4.70300 15702.74 ------------------------------------------------------------------------------------------------------ scalar: valkey:unstable 2df56d87c0ebe802f38e8922bb2ea1e4ca9cfa76 avx2: Nugine:hll-simd 8f9adc34021080d96e60bd0abe06b043f3ed0275 CPU: 13th Gen Intel® Core™ i9-13900H × 20 Memory: 32.0 GiB OS: Ubuntu 22.04.5 LTS ``` Experiment repo: https://github.com/Nugine/redis-hyperloglog Benchmark script: https://github.com/Nugine/redis-hyperloglog/blob/main/scripts/memtier.sh Algorithm: https://github.com/Nugine/redis-hyperloglog/blob/main/cpp/bench.cpp --------- Signed-off-by: Xuyang Wang --- src/config.h | 13 ++ src/hyperloglog.c | 303 +++++++++++++++++++++++++++++++++++-- tests/unit/hyperloglog.tcl | 40 +++++ 3 files changed, 345 insertions(+), 11 deletions(-) diff --git a/src/config.h b/src/config.h index 3b79c5c681..a2e9f353dc 100644 --- a/src/config.h +++ b/src/config.h @@ -364,4 +364,17 @@ void setcpuaffinity(const char *cpulist); #define valkey_prefetch(addr) ((void)(addr)) #endif +/* Check if we can compile AVX2 code */ +#if defined(__x86_64__) && ((defined(__GNUC__) && __GNUC__ >= 5) || (defined(__clang__) && __clang_major__ >= 4)) +#if defined(__has_attribute) && __has_attribute(target) +#define HAVE_AVX2 +#endif +#endif + +#if defined(HAVE_AVX2) +#define ATTRIBUTE_TARGET_AVX2 __attribute__((target("avx2"))) +#else +#define ATTRIBUTE_TARGET_AVX2 +#endif + #endif diff --git a/src/hyperloglog.c b/src/hyperloglog.c index 563c5e7941..9a48c821ab 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -35,6 +35,10 @@ #include #include +#ifdef HAVE_AVX2 +#include +#endif + /* The HyperLogLog implementation is based on the following ideas: * * * The use of a 64 bit hash function as proposed in [1], in order to estimate @@ -208,6 +212,13 @@ struct hllhdr { static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected"; +#ifdef HAVE_AVX2 +static int simd_enabled = 1; +#define HLL_USE_AVX2 (simd_enabled && __builtin_cpu_supports("avx2")) +#else +#define HLL_USE_AVX2 0 +#endif + /* =========================== Low level bit macros ========================= */ /* Macros to access the dense representation. @@ -1064,6 +1075,136 @@ int hllAdd(robj *o, unsigned char *ele, size_t elesize) { } } +#ifdef HAVE_AVX2 +/* A specialized version of hllMergeDense, optimized for default configurations. + * + * Requirements: + * 1) HLL_REGISTERS == 16384 && HLL_BITS == 6 + * 2) The CPU supports AVX2 (checked at runtime in hllMergeDense) + * + * reg_raw: pointer to the raw representation array (16384 bytes, one byte per register) + * reg_dense: pointer to the dense representation array (12288 bytes, 6 bits per register) + */ +ATTRIBUTE_TARGET_AVX2 +void hllMergeDenseAVX2(uint8_t *reg_raw, const uint8_t *reg_dense) { + /* Shuffle indices for unpacking bytes of dense registers + * From: {XXXX|AAAB|BBCC|CDDD|EEEF|FFGG|GHHH|XXXX} + * To: {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0} + */ + const __m256i shuffle = _mm256_setr_epi8( // + 4, 5, 6, -1, // + 7, 8, 9, -1, // + 10, 11, 12, -1, // + 13, 14, 15, -1, // + 0, 1, 2, -1, // + 3, 4, 5, -1, // + 6, 7, 8, -1, // + 9, 10, 11, -1 // + ); + + /* Merge the first 8 registers (6 bytes) normally + * as the AVX2 algorithm needs 4 padding bytes at the start */ + uint8_t val; + for (int i = 0; i < 8; i++) { + HLL_DENSE_GET_REGISTER(val, reg_dense, i); + if (val > reg_raw[i]) { + reg_raw[i] = val; + } + } + + /* Dense to Raw: + * + * 4 registers in 3 bytes: + * {bbaaaaaa|ccccbbbb|ddddddcc} + * + * LOAD 32 bytes (32 registers) per iteration: + * 4(padding) + 12(16 registers) + 12(16 registers) + 4(padding) + * {XXXX|AAAB|BBCC|CDDD|EEEF|FFGG|GHHH|XXXX} + * + * SHUFFLE to: + * {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0} + * {bbaaaaaa|ccccbbbb|ddddddcc|00000000} x8 + * + * AVX2 is little endian, each of the 8 groups is a little-endian int32. + * A group (int32) contains 3 valid bytes (4 registers) and a zero byte. + * + * extract registers in each group with AND and SHIFT: + * {00aaaaaa|00000000|00000000|00000000} x8 (<<0) + * {00000000|00bbbbbb|00000000|00000000} x8 (<<2) + * {00000000|00000000|00cccccc|00000000} x8 (<<4) + * {00000000|00000000|00000000|00dddddd} x8 (<<6) + * + * merge the extracted registers with OR: + * {00aaaaaa|00bbbbbb|00cccccc|00dddddd} x8 + * + * Finally, compute MAX(reg_raw, merged) and STORE it back to reg_raw + */ + + /* Skip 8 registers (6 bytes) */ + const uint8_t *r = reg_dense + 6 - 4; + uint8_t *t = reg_raw + 8; + + for (int i = 0; i < HLL_REGISTERS / 32 - 1; ++i) { + __m256i x0, x; + x0 = _mm256_loadu_si256((__m256i *)r); + x = _mm256_shuffle_epi8(x0, shuffle); + + __m256i a1, a2, a3, a4; + a1 = _mm256_and_si256(x, _mm256_set1_epi32(0x0000003f)); + a2 = _mm256_and_si256(x, _mm256_set1_epi32(0x00000fc0)); + a3 = _mm256_and_si256(x, _mm256_set1_epi32(0x0003f000)); + a4 = _mm256_and_si256(x, _mm256_set1_epi32(0x00fc0000)); + + a2 = _mm256_slli_epi32(a2, 2); + a3 = _mm256_slli_epi32(a3, 4); + a4 = _mm256_slli_epi32(a4, 6); + + __m256i y1, y2, y; + y1 = _mm256_or_si256(a1, a2); + y2 = _mm256_or_si256(a3, a4); + y = _mm256_or_si256(y1, y2); + + __m256i z = _mm256_loadu_si256((__m256i *)t); + + z = _mm256_max_epu8(z, y); + + _mm256_storeu_si256((__m256i *)t, z); + + r += 24; + t += 32; + } + + /* Merge the last 24 registers normally + * as the AVX2 algorithm needs 4 padding bytes at the end */ + for (int i = HLL_REGISTERS - 24; i < HLL_REGISTERS; i++) { + HLL_DENSE_GET_REGISTER(val, reg_dense, i); + if (val > reg_raw[i]) { + reg_raw[i] = val; + } + } +} +#endif + +/* Merge dense-encoded registers to raw registers array. */ +void hllMergeDense(uint8_t *reg_raw, const uint8_t *reg_dense) { +#ifdef HAVE_AVX2 + if (HLL_REGISTERS == 16384 && HLL_BITS == 6) { + if (HLL_USE_AVX2) { + hllMergeDenseAVX2(reg_raw, reg_dense); + return; + } + } +#endif + + uint8_t val; + for (int i = 0; i < HLL_REGISTERS; i++) { + HLL_DENSE_GET_REGISTER(val, reg_dense, i); + if (val > reg_raw[i]) { + reg_raw[i] = val; + } + } +} + /* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll' * with an array of uint8_t HLL_REGISTERS registers pointed by 'max'. * @@ -1077,12 +1218,7 @@ int hllMerge(uint8_t *max, robj *hll) { int i; if (hdr->encoding == HLL_DENSE) { - uint8_t val; - - for (i = 0; i < HLL_REGISTERS; i++) { - HLL_DENSE_GET_REGISTER(val, hdr->registers, i); - if (val > max[i]) max[i] = val; - } + hllMergeDense(max, hdr->registers); } else { uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr); long runlen, regval; @@ -1114,6 +1250,121 @@ int hllMerge(uint8_t *max, robj *hll) { return C_OK; } +#ifdef HAVE_AVX2 +/* A specialized version of hllDenseCompress, optimized for default configurations. + * + * Requirements: + * 1) HLL_REGISTERS == 16384 && HLL_BITS == 6 + * 2) The CPU supports AVX2 (checked at runtime in hllDenseCompress) + * + * reg_dense: pointer to the dense representation array (12288 bytes, 6 bits per register) + * reg_raw: pointer to the raw representation array (16384 bytes, one byte per register) + */ +ATTRIBUTE_TARGET_AVX2 +void hllDenseCompressAVX2(uint8_t *reg_dense, const uint8_t *reg_raw) { + /* Shuffle indices for packing bytes of dense registers + * From: {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0} + * To: {AAAB|BBCC|CDDD|0000|EEEF|FFGG|GHHH|0000} + */ + const __m256i shuffle = _mm256_setr_epi8( // + 0, 1, 2, // + 4, 5, 6, // + 8, 9, 10, // + 12, 13, 14, // + -1, -1, -1, -1, // + 0, 1, 2, // + 4, 5, 6, // + 8, 9, 10, // + 12, 13, 14, // + -1, -1, -1, -1 // + ); + + /* Raw to Dense: + * + * LOAD 32 bytes (32 registers) per iteration: + * {00aaaaaa|00bbbbbb|00cccccc|00dddddd} x8 + * + * AVX2 is little endian, each of the 8 groups is a little-endian int32. + * A group (int32) contains 4 registers. + * + * move the registers to correct positions with AND and SHIFT: + * {00aaaaaa|00000000|00000000|00000000} x8 (>>0) + * {bb000000|0000bbbb|00000000|00000000} x8 (>>2) + * {00000000|cccc0000|000000cc|00000000} x8 (>>4) + * {00000000|00000000|dddddd00|00000000} x8 (>>6) + * + * merge the registers with OR: + * {bbaaaaaa|ccccbbbb|ddddddcc|00000000} x8 + * {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0} + * + * SHUFFLE to: + * {AAAB|BBCC|CDDD|0000|EEEF|FFGG|GHHH|0000} + * + * STORE the lower half and higher half respectively: + * AAABBBCCCDDD0000 + * EEEFFFGGGHHH0000 + * AAABBBCCCDDDEEEFFFGGGHHH0000 + * + * Note that the last 4 bytes are padding bytes. + */ + + const uint8_t *r = reg_raw; + uint8_t *t = reg_dense; + + for (int i = 0; i < HLL_REGISTERS / 32 - 1; ++i) { + __m256i x = _mm256_loadu_si256((__m256i *)r); + + __m256i a1, a2, a3, a4; + a1 = _mm256_and_si256(x, _mm256_set1_epi32(0x0000003f)); + a2 = _mm256_and_si256(x, _mm256_set1_epi32(0x00003f00)); + a3 = _mm256_and_si256(x, _mm256_set1_epi32(0x003f0000)); + a4 = _mm256_and_si256(x, _mm256_set1_epi32(0x3f000000)); + + a2 = _mm256_srli_epi32(a2, 2); + a3 = _mm256_srli_epi32(a3, 4); + a4 = _mm256_srli_epi32(a4, 6); + + __m256i y1, y2, y; + y1 = _mm256_or_si256(a1, a2); + y2 = _mm256_or_si256(a3, a4); + y = _mm256_or_si256(y1, y2); + y = _mm256_shuffle_epi8(y, shuffle); + + __m128i lower, higher; + lower = _mm256_castsi256_si128(y); + higher = _mm256_extracti128_si256(y, 1); + + _mm_storeu_si128((__m128i *)t, lower); + _mm_storeu_si128((__m128i *)(t + 12), higher); + + r += 32; + t += 24; + } + + /* Merge the last 32 registers normally + * as the AVX2 algorithm needs 4 padding bytes at the end */ + for (int i = HLL_REGISTERS - 32; i < HLL_REGISTERS; i++) { + HLL_DENSE_SET_REGISTER(reg_dense, i, reg_raw[i]); + } +} +#endif + +/* Compress raw registers to dense representation. */ +void hllDenseCompress(uint8_t *reg_dense, const uint8_t *reg_raw) { +#ifdef HAVE_AVX2 + if (HLL_REGISTERS == 16384 && HLL_BITS == 6) { + if (HLL_USE_AVX2) { + hllDenseCompressAVX2(reg_dense, reg_raw); + return; + } + } +#endif + + for (int i = 0; i < HLL_REGISTERS; i++) { + HLL_DENSE_SET_REGISTER(reg_dense, i, reg_raw[i]); + } +} + /* ========================== HyperLogLog commands ========================== */ /* Create an HLL object. We always create the HLL using sparse encoding. @@ -1363,12 +1614,17 @@ void pfmergeCommand(client *c) { /* Write the resulting HLL to the destination HLL registers and * invalidate the cached value. */ - for (j = 0; j < HLL_REGISTERS; j++) { - if (max[j] == 0) continue; + if (use_dense) { hdr = o->ptr; - switch (hdr->encoding) { - case HLL_DENSE: hllDenseSet(hdr->registers, j, max[j]); break; - case HLL_SPARSE: hllSparseSet(o, j, max[j]); break; + hllDenseCompress(hdr->registers, max); + } else { + for (j = 0; j < HLL_REGISTERS; j++) { + if (max[j] == 0) continue; + hdr = o->ptr; + switch (hdr->encoding) { + case HLL_DENSE: hllDenseSet(hdr->registers, j, max[j]); break; + case HLL_SPARSE: hllSparseSet(o, j, max[j]); break; + } } } hdr = o->ptr; /* o->ptr may be different now, as a side effect of @@ -1494,6 +1750,7 @@ void pfselftestCommand(client *c) { * PFDEBUG DECODE * PFDEBUG ENCODING * PFDEBUG TODENSE + * PFDEBUG SIMD (ON|OFF) */ void pfdebugCommand(client *c) { char *cmd = c->argv[1]->ptr; @@ -1501,6 +1758,30 @@ void pfdebugCommand(client *c) { robj *o; int j; + if (!strcasecmp(cmd, "simd")) { + if (c->argc != 3) goto arityerr; + + if (!strcasecmp(c->argv[2]->ptr, "on")) { +#ifdef HAVE_AVX2 + simd_enabled = 1; +#endif + } else if (!strcasecmp(c->argv[2]->ptr, "off")) { +#ifdef HAVE_AVX2 + simd_enabled = 0; +#endif + } else { + addReplyError(c, "Argument must be ON or OFF"); + } + + if (HLL_USE_AVX2) { + addReplyStatus(c, "enabled"); + } else { + addReplyStatus(c, "disabled"); + } + + return; + } + o = lookupKeyWrite(c->db, c->argv[2]); if (o == NULL) { addReplyError(c, "The specified key does not exist"); diff --git a/tests/unit/hyperloglog.tcl b/tests/unit/hyperloglog.tcl index c1b3b3a79f..765d5e0bdd 100644 --- a/tests/unit/hyperloglog.tcl +++ b/tests/unit/hyperloglog.tcl @@ -222,6 +222,46 @@ start_server {tags {"hll"}} { assert_equal 3 [r pfcount destkey] } + test {PFMERGE results with simd} { + r del hllscalar{t} hllsimd{t} hll1{t} hll2{t} hll3{t} + for {set x 1} {$x < 2000} {incr x} { + r pfadd hll1{t} [expr rand()] + } + for {set x 1} {$x < 4000} {incr x} { + r pfadd hll2{t} [expr rand()] + } + for {set x 1} {$x < 8000} {incr x} { + r pfadd hll3{t} [expr rand()] + } + assert {[r pfcount hll1{t}] > 0} + assert {[r pfcount hll2{t}] > 0} + assert {[r pfcount hll3{t}] > 0} + + r pfdebug simd off + set scalar [r pfcount hll1{t} hll2{t} hll3{t}] + r pfdebug simd on + set simd [r pfcount hll1{t} hll2{t} hll3{t}] + assert {$scalar > 0} + assert {$simd > 0} + assert_equal $scalar $simd + + r pfdebug simd off + r pfmerge hllscalar{t} hll1{t} hll2{t} hll3{t} + r pfdebug simd on + r pfmerge hllsimd{t} hll1{t} hll2{t} hll3{t} + + set scalar [r pfcount hllscalar{t}] + set simd [r pfcount hllsimd{t}] + assert {$scalar > 0} + assert {$simd > 0} + assert_equal $scalar $simd + + set scalar [r get hllscalar{t}] + set simd [r get hllsimd{t}] + assert_equal $scalar $simd + + } {} {needs:pfdebug} + test {PFCOUNT multiple-keys merge returns cardinality of union #1} { r del hll1{t} hll2{t} hll3{t} for {set x 1} {$x < 10000} {incr x} {