diff --git a/.vscode/settings.json b/.vscode/settings.json index c0c885af..4b489146 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -93,7 +93,9 @@ "format": "c", "execution": "cpp", "math.h": "c", - "float.h": "c" + "float.h": "c", + "text_encoding": "cpp", + "stdio.h": "c" }, "cSpell.words": [ "allclose", diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 0728a344..f7b9b91c 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -101,7 +101,7 @@ You can also benchmark against other libraries, filter the numeric types, and di $ python scripts/bench_vectors.py --help > usage: bench.py [-h] [--ndim NDIM] [-n COUNT] > [--metric {all,dot,spatial,binary,probability,sparse}] -> [--dtype {all,bits,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128}] +> [--dtype {all,bin8,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128}] > [--scipy] [--scikit] [--torch] [--tf] [--jax] > > Benchmark SimSIMD vs. other libraries @@ -119,7 +119,7 @@ $ python scripts/bench_vectors.py --help > `cdist`. > --metric {all,dot,spatial,binary,probability,sparse} > Distance metric to use, profiles everything by default -> --dtype {all,bits,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128} +> --dtype {all,bin8,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128} > Defines numeric types to benchmark, profiles everything by default > --scipy Profile SciPy, must be installed > --scikit Profile scikit-learn, must be installed @@ -203,6 +203,35 @@ bun test swift build && swift test -v ``` +Running Swift on Linux requires a couple of extra steps, as the Swift compiler is not available in the default repositories. +Please get the most recent Swift tarball from the [official website](https://www.swift.org/install/). +At the time of writing, for 64-bit Arm CPU running Ubuntu 22.04, the following commands would work: + +```bash +wget https://download.swift.org/swift-5.9.2-release/ubuntu2204-aarch64/swift-5.9.2-RELEASE/swift-5.9.2-RELEASE-ubuntu22.04-aarch64.tar.gz +tar xzf swift-5.9.2-RELEASE-ubuntu22.04-aarch64.tar.gz +sudo mv swift-5.9.2-RELEASE-ubuntu22.04-aarch64 /usr/share/swift +echo "export PATH=/usr/share/swift/usr/bin:$PATH" >> ~/.bashrc +source ~/.bashrc +``` + +You can check the available images on [`swift.org/download` page](https://www.swift.org/download/#releases). +For x86 CPUs, the following commands would work: + +```bash +wget https://download.swift.org/swift-5.9.2-release/ubuntu2204/swift-5.9.2-RELEASE/swift-5.9.2-RELEASE-ubuntu22.04.tar.gz +tar xzf swift-5.9.2-RELEASE-ubuntu22.04.tar.gz +sudo mv swift-5.9.2-RELEASE-ubuntu22.04 /usr/share/swift +echo "export PATH=/usr/share/swift/usr/bin:$PATH" >> ~/.bashrc +source ~/.bashrc +``` + +Alternatively, on Linux, the official Swift Docker image can be used for builds and tests: + +```bash +sudo docker run --rm -v "$PWD:/workspace" -w /workspace swift:5.9 /bin/bash -cl "swift build -c release --static-swift-stdlib && swift test -c release --enable-test-discovery" +``` + ## GoLang ```sh diff --git a/README.md b/README.md index cded1247..6a0f90c2 100644 --- a/README.md +++ b/README.md @@ -69,9 +69,9 @@ Implemented distance functions include: Moreover, SimSIMD... -- handles `f64`, `f32`, `f16`, and `bf16` real & complex vectors. -- handles `i8` integral, `i4` sub-byte, and `b8` binary vectors. -- handles sparse `u32` and `u16` sets, and weighted sparse vectors. +- handles `float64`, `float32`, `float16`, and `bfloat16` real & complex vectors. +- handles `int8` integral, `int4` sub-byte, and `b8` binary vectors. +- handles sparse `uint32` and `uint16` sets, and weighted sparse vectors. - is a zero-dependency [header-only C 99](#using-simsimd-in-c) library. - has [Python](#using-simsimd-in-python), [Rust](#using-simsimd-in-rust), [JS](#using-simsimd-in-javascript), and [Swift](#using-simsimd-in-swift) bindings. - has Arm backends for NEON, Scalable Vector Extensions (SVE), and SVE2. @@ -95,14 +95,14 @@ You can learn more about the technical implementation details in the following b For reference, we use 1536-dimensional vectors, like the embeddings produced by the OpenAI Ada API. Comparing the serial code throughput produced by GCC 12 to hand-optimized kernels in SimSIMD, we see the following single-core improvements for the two most common vector-vector similarity metrics - the Cosine similarity and the Euclidean distance: -| Type | Apple M2 Pro | Intel Sapphire Rapids | AWS Graviton 4 | -| :----- | ----------------------------: | -------------------------------: | ------------------------------: | -| `f64` | 18.5 → 28.8 GB/s
+ 56 % | 21.9 → 41.4 GB/s
+ 89 % | 20.7 → 41.3 GB/s
+ 99 % | -| `f32` | 9.2 → 29.6 GB/s
+ 221 % | 10.9 → 95.8 GB/s
+ 779 % | 4.9 → 41.9 GB/s
+ 755 % | -| `f16` | 4.6 → 14.6 GB/s
+ 217 % | 3.1 → 108.4 GB/s
+ 3,397 % | 5.4 → 39.3 GB/s
+ 627 % | -| `bf16` | 4.6 → 26.3 GB/s
+ 472 % | 0.8 → 59.5 GB/s
+7,437 % | 2.5 → 29.9 GB/s
+ 1,096 % | -| `i8` | 25.8 → 47.1 GB/s
+ 83 % | 33.1 → 65.3 GB/s
+ 97 % | 35.2 → 43.5 GB/s
+ 24 % | -| `u8` | | 32.5 → 66.5 GB/s
+ 105 % | | +| Type | Apple M2 Pro | Intel Sapphire Rapids | AWS Graviton 4 | +| :--------- | ----------------------------: | -------------------------------: | ------------------------------: | +| `float64` | 18.5 → 28.8 GB/s
+ 56 % | 21.9 → 41.4 GB/s
+ 89 % | 20.7 → 41.3 GB/s
+ 99 % | +| `float32` | 9.2 → 29.6 GB/s
+ 221 % | 10.9 → 95.8 GB/s
+ 779 % | 4.9 → 41.9 GB/s
+ 755 % | +| `float16` | 4.6 → 14.6 GB/s
+ 217 % | 3.1 → 108.4 GB/s
+ 3,397 % | 5.4 → 39.3 GB/s
+ 627 % | +| `bfloat16` | 4.6 → 26.3 GB/s
+ 472 % | 0.8 → 59.5 GB/s
+7,437 % | 2.5 → 29.9 GB/s
+ 1,096 % | +| `int8` | 25.8 → 47.1 GB/s
+ 83 % | 33.1 → 65.3 GB/s
+ 97 % | 35.2 → 43.5 GB/s
+ 24 % | +| `uint8` | | 32.5 → 66.5 GB/s
+ 105 % | | Similar speedups are often observed even when compared to BLAS and LAPACK libraries underlying most numerical computing libraries, including NumPy and SciPy in Python. Broader benchmarking results: @@ -115,8 +115,8 @@ Broader benchmarking results: The package is intended to replace the usage of `numpy.inner`, `numpy.dot`, and `scipy.spatial.distance`. Aside from drastic performance improvements, SimSIMD significantly improves accuracy in mixed precision setups. -NumPy and SciPy, processing `i8`, `u8` or `f16` vectors, will use the same types for accumulators, while SimSIMD can combine `i8` enumeration, `i16` multiplication, and `i32` accumulation to avoid overflows entirely. -The same applies to processing `f16` and `bf16` values with `f32` precision. +NumPy and SciPy, processing `int8`, `uint8` or `float16` vectors, will use the same types for accumulators, while SimSIMD can combine `int8` enumeration, `int16` multiplication, and `int32` accumulation to avoid overflows entirely. +The same applies to processing `float16` and `bfloat16` values with `float32` precision. ### Installation @@ -155,14 +155,33 @@ dist = simsimd.vdot(vec1.astype(np.complex64), vec2.astype(np.complex64)) # conj ``` Unlike SciPy, SimSIMD allows explicitly stating the precision of the input vectors, which is especially useful for mixed-precision setups. +The `dtype` argument can be passed both by name and as a positional argument: ```py -dist = simsimd.cosine(vec1, vec2, "i8") -dist = simsimd.cosine(vec1, vec2, "f16") -dist = simsimd.cosine(vec1, vec2, "f32") -dist = simsimd.cosine(vec1, vec2, "f64") -dist = simsimd.hamming(vec1, vec2, "bits") -dist = simsimd.jaccard(vec1, vec2, "bits") +dist = simsimd.cosine(vec1, vec2, "int8") +dist = simsimd.cosine(vec1, vec2, "float16") +dist = simsimd.cosine(vec1, vec2, "float32") +dist = simsimd.cosine(vec1, vec2, "float64") +dist = simsimd.hamming(vec1, vec2, "bit8") +``` + +With other frameworks, like PyTorch, one can get a richer type-system than NumPy, but the lack of good CPython interoperability makes it hard to pass data without copies. + +```py +import numpy as np +buf1 = np.empty(8, dtype=np.uint16) +buf2 = np.empty(8, dtype=np.uint16) + +# View the same memory region with PyTorch and randomize it +import torch +vec1 = torch.asarray(memoryview(buf1), copy=False).view(torch.bfloat16) +vec2 = torch.asarray(memoryview(buf2), copy=False).view(torch.bfloat16) +torch.randn(8, out=vec1) +torch.randn(8, out=vec2) + +# Both libs will look into the same memory buffers and report the same results +dist_slow = 1 - torch.nn.functional.cosine_similarity(vec1, vec2, dim=0) +dist_fast = simsimd.cosine(buf1, buf2, "bf16") ``` It also allows using SimSIMD for half-precision complex numbers, which NumPy does not support. @@ -235,6 +254,48 @@ distances: DistancesTensor = simsimd.cdist(matrix1, matrix2, metric="cosine") distances_array: np.ndarray = np.array(distances, copy=True) # now managed by NumPy ``` +### Elementwise Kernels + +SimSIMD also provides mixed-precision elementwise kernels, where the input vectors and the output have the same numeric type, but the intermediate accumulators are of a higher precision. + +```py +import numpy as np +from simsimd import fma, wsum + +# Let's take two FullHD video frames +first_frame = np.random.randn(1920 * 1024).astype(np.uint8) +second_frame = np.random.randn(1920 * 1024).astype(np.uint8) +average_frame = np.empty_like(first_frame) +wsum(first_frame, second_frame, alpha=0.5, beta=0.5, out=average_frame) + +# Slow analog with NumPy: +slow_average_frame = (0.5 * first_frame + 0.5 * second_frame).astype(np.uint8) +``` + +Similarly, the `fma` takes three arguments and computes the fused multiply-add operation. +In applications like Machine Learning you may also benefit from using the "brain-float" format not natively supported by NumPy. +In 3D Graphics, for example, we can use FMA to compute the [Phong shading model](https://en.wikipedia.org/wiki/Phong_shading): + +```py +# Assume a FullHD frame with random values for simplicity +light_intensity = np.random.rand(1920 * 1080).astype(np.float16) # Intensity of light on each pixel +diffuse_component = np.random.rand(1920 * 1080).astype(np.float16) # Diffuse reflectance on the surface +specular_component = np.random.rand(1920 * 1080).astype(np.float16) # Specular reflectance for highlights +output_color = np.empty_like(light_intensity) # Array to store the resulting color intensity + +# Define the scaling factors for diffuse and specular contributions +alpha = 0.7 # Weight for the diffuse component +beta = 0.3 # Weight for the specular component + +# Formula: color = alpha * light_intensity * diffuse_component + beta * specular_component +fma(light_intensity, diffuse_component, specular_component, + dtype="float16", # Optional, unless it can't be inferred from the input + alpha=alpha, beta=beta, out=output_color) + +# Slow analog with NumPy for comparison +slow_output_color = (alpha * light_intensity * diffuse_component + beta * specular_component).astype(np.float16) +``` + ### Multithreading and Memory Usage By default, computations use a single CPU core. @@ -248,15 +309,15 @@ matrix1 = np.packbits(np.random.randint(2, size=(10_000, ndim)).astype(np.uint8) matrix2 = np.packbits(np.random.randint(2, size=(1_000, ndim)).astype(np.uint8)) distances = simsimd.cdist(matrix1, matrix2, - metric="hamming", # Unlike SciPy, SimSIMD doesn't divide by the number of dimensions - out_dtype="u8", # so we can use `u8` instead of `f64` to save memory. - threads=0, # Use all CPU cores with OpenMP. - dtype="b8", # Override input argument type to `b8` eight-bit words. + metric="hamming", # Unlike SciPy, SimSIMD doesn't divide by the number of dimensions + out_dtype="uint8", # so we can use `uint8` instead of `float64` to save memory. + threads=0, # Use all CPU cores with OpenMP. + dtype="bin8", # Override input argument type to `bin8` eight-bit words. ) ``` -By default, the output distances will be stored in double-precision `f64` floating-point numbers. -That behavior may not be space-efficient, especially if you are computing the hamming distance between short binary vectors, that will generally fit into 8x smaller `u8` or `u16` types. +By default, the output distances will be stored in double-precision `float64` floating-point numbers. +That behavior may not be space-efficient, especially if you are computing the hamming distance between short binary vectors, that will generally fit into 8x smaller `uint8` or `uint16` types. To override this behavior, use the `dtype` argument. ### Helper Functions @@ -575,7 +636,7 @@ Simplest of all, you can include the headers, and the compiler will automaticall int main() { simsimd_f32_t vector_a[1536]; simsimd_f32_t vector_b[1536]; - simsimd_metric_punned_t distance_function = simsimd_metric_punned( + simsimd_kernel_punned_t distance_function = simsimd_metric_punned( simsimd_metric_cos_k, // Metric kind, like the angular cosine distance simsimd_datatype_f32_k, // Data type, like: f16, f32, f64, i8, b8, and complex variants simsimd_cap_any_k); // Which CPU capabilities are we allowed to use @@ -663,7 +724,6 @@ int main() { simsimd_vdot_f16c(f16s, f16s, 1536, &distance); simsimd_vdot_f32c(f32s, f32s, 1536, &distance); simsimd_vdot_f64c(f64s, f64s, 1536, &distance); - return 0; } ``` @@ -676,13 +736,8 @@ int main() { int main() { simsimd_b8_t b8s[1536 / 8]; // 8 bits per word simsimd_distance_t distance; - - // Hamming distance between two vectors simsimd_hamming_b8(b8s, b8s, 1536 / 8, &distance); - - // Jaccard distance between two vectors simsimd_jaccard_b8(b8s, b8s, 1536 / 8, &distance); - return 0; } ``` @@ -707,7 +762,6 @@ int main() { simsimd_kl_f16(f16s, f16s, 1536, &distance); simsimd_kl_f32(f32s, f32s, 1536, &distance); simsimd_kl_f64(f64s, f64s, 1536, &distance); - return 0; } ``` @@ -949,10 +1003,10 @@ In NumPy terms, the implementation may look like: ```py import numpy as np -def wsum(A: np.ndarray, B: np.ndarray, Alpha: float, Beta: float) -> np.ndarray: +def wsum(A: np.ndarray, B: np.ndarray, /, Alpha: float, Beta: float) -> np.ndarray: assert A.dtype == B.dtype, "Input types must match and affect the output style" return (Alpha * A + Beta * B).astype(A.dtype) -def fma(A: np.ndarray, B: np.ndarray, C: np.ndarray, Alpha: float, Beta: float) -> np.ndarray: +def fma(A: np.ndarray, B: np.ndarray, C: np.ndarray, /, Alpha: float, Beta: float) -> np.ndarray: assert A.dtype == B.dtype and A.dtype == C.dtype, "Input types must match and affect the output style" return (Alpha * A * B + Beta * C).astype(A.dtype) ``` @@ -1095,7 +1149,7 @@ All of the function names follow the same pattern: `simsimd_{function}_{type}_{b - The type can be `f64`, `f32`, `f16`, `bf16`, `f64c`, `f32c`, `f16c`, `bf16c`, `i8`, or `b8`. - The function can be `dot`, `vdot`, `cos`, `l2sq`, `hamming`, `jaccard`, `kl`, `js`, or `intersect`. -To avoid hard-coding the backend, you can use the `simsimd_metric_punned_t` to pun the function pointer and the `simsimd_capabilities` function to get the available backends at runtime. +To avoid hard-coding the backend, you can use the `simsimd_kernel_punned_t` to pun the function pointer and the `simsimd_capabilities` function to get the available backends at runtime. To match all the function names, consider a RegEx: ```regex diff --git a/c/lib.c b/c/lib.c index d59724a1..60b985d5 100644 --- a/c/lib.c +++ b/c/lib.c @@ -55,20 +55,21 @@ extern "C" { // If no metric is found, it returns NaN. We can obtain NaN by dividing 0.0 by 0.0, but that annoys // the MSVC compiler. Instead we can directly write-in the signaling NaN (0x7FF0000000000001) // or the qNaN (0x7FF8000000000000). -#define SIMSIMD_DECLARATION_DENSE(name, extension, type) \ - SIMSIMD_DYNAMIC void simsimd_##name##_##extension(simsimd_##type##_t const *a, simsimd_##type##_t const *b, \ - simsimd_size_t n, simsimd_distance_t *results) { \ - static simsimd_metric_punned_t metric = 0; \ - if (metric == 0) { \ - simsimd_capability_t used_capability; \ - simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ - simsimd_capabilities(), simsimd_cap_any_k, &metric, &used_capability); \ - if (!metric) { \ - *(simsimd_u64_t *)results = 0x7FF0000000000001ull; \ - return; \ - } \ - } \ - metric(a, b, n, results); \ +#define SIMSIMD_DECLARATION_DENSE(name, extension, type) \ + SIMSIMD_DYNAMIC void simsimd_##name##_##extension(simsimd_##type##_t const *a, simsimd_##type##_t const *b, \ + simsimd_size_t n, simsimd_distance_t *results) { \ + static simsimd_metric_dense_punned_t metric = 0; \ + if (metric == 0) { \ + simsimd_capability_t used_capability; \ + simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ + simsimd_capabilities(), simsimd_cap_any_k, (simsimd_kernel_punned_t *)&metric, \ + &used_capability); \ + if (!metric) { \ + *(simsimd_u64_t *)results = 0x7FF0000000000001ull; \ + return; \ + } \ + } \ + metric(a, b, n, results); \ } #define SIMSIMD_DECLARATION_SPARSE(name, extension, type) \ @@ -78,9 +79,9 @@ extern "C" { static simsimd_metric_sparse_punned_t metric = 0; \ if (metric == 0) { \ simsimd_capability_t used_capability; \ - simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ + simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ simsimd_capabilities(), simsimd_cap_any_k, \ - (simsimd_metric_punned_t *)(&metric), &used_capability); \ + (simsimd_kernel_punned_t *)(&metric), &used_capability); \ if (!metric) { \ *(simsimd_u64_t *)result = 0x7FF0000000000001ull; \ return; \ @@ -96,9 +97,9 @@ extern "C" { static simsimd_metric_curved_punned_t metric = 0; \ if (metric == 0) { \ simsimd_capability_t used_capability; \ - simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ + simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ simsimd_capabilities(), simsimd_cap_any_k, \ - (simsimd_metric_punned_t *)(&metric), &used_capability); \ + (simsimd_kernel_punned_t *)(&metric), &used_capability); \ if (!metric) { \ *(simsimd_u64_t *)result = 0x7FF0000000000001ull; \ return; \ @@ -114,9 +115,9 @@ extern "C" { static simsimd_kernel_fma_punned_t metric = 0; \ if (metric == 0) { \ simsimd_capability_t used_capability; \ - simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ + simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ simsimd_capabilities(), simsimd_cap_any_k, \ - (simsimd_metric_punned_t *)(&metric), &used_capability); \ + (simsimd_kernel_punned_t *)(&metric), &used_capability); \ } \ metric(a, b, c, n, alpha, beta, result); \ } @@ -128,9 +129,9 @@ extern "C" { static simsimd_kernel_wsum_punned_t metric = 0; \ if (metric == 0) { \ simsimd_capability_t used_capability; \ - simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ + simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \ simsimd_capabilities(), simsimd_cap_any_k, \ - (simsimd_metric_punned_t *)(&metric), &used_capability); \ + (simsimd_kernel_punned_t *)(&metric), &used_capability); \ } \ metric(a, b, n, alpha, beta, result); \ } @@ -326,14 +327,14 @@ SIMSIMD_DYNAMIC simsimd_capability_t simsimd_capabilities(void) { return static_capabilities; } -SIMSIMD_DYNAMIC void simsimd_find_metric_punned( // +SIMSIMD_DYNAMIC void simsimd_find_kernel_punned( // simsimd_metric_kind_t kind, // simsimd_datatype_t datatype, // simsimd_capability_t supported, // simsimd_capability_t allowed, // - simsimd_metric_punned_t *metric_output, // + simsimd_kernel_punned_t *kernel_output, // simsimd_capability_t *capability_output) { - _simsimd_find_metric_punned_implementation(kind, datatype, supported, allowed, metric_output, capability_output); + _simsimd_find_kernel_punned_implementation(kind, datatype, supported, allowed, kernel_output, capability_output); } #ifdef __cplusplus diff --git a/include/simsimd/elementwise.h b/include/simsimd/elementwise.h index ea5dac23..fb3e72a5 100644 --- a/include/simsimd/elementwise.h +++ b/include/simsimd/elementwise.h @@ -36,8 +36,8 @@ * x86 intrinsics: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/ * Arm intrinsics: https://developer.arm.com/architectures/instruction-sets/intrinsics/ */ -#ifndef SIMSIMD_FMA_H -#define SIMSIMD_FMA_H +#ifndef SIMSIMD_ELEMENTWISE_H +#define SIMSIMD_ELEMENTWISE_H #include "types.h" diff --git a/include/simsimd/simsimd.h b/include/simsimd/simsimd.h index eb94d76b..dce52855 100644 --- a/include/simsimd/simsimd.h +++ b/include/simsimd/simsimd.h @@ -304,28 +304,29 @@ typedef void (*simsimd_kernel_wsum_punned_t)(void const *a, void const *b, // /** * @brief Type-punned function pointer for a SimSIMD public interface. - * Can be a `simsimd_metric_dense_punned_t`, `simsimd_metric_sparse_punned_t`, - * or `simsimd_metric_curved_punned_t`. + * + * Can be a `simsimd_metric_dense_punned_t`, `simsimd_metric_sparse_punned_t`, `simsimd_metric_curved_punned_t`, + * `simsimd_kernel_fma_punned_t`, or `simsimd_kernel_wsum_punned_t`. */ -typedef simsimd_metric_dense_punned_t simsimd_metric_punned_t; +typedef void (*simsimd_kernel_punned_t)(void *); #if SIMSIMD_DYNAMIC_DISPATCH SIMSIMD_DYNAMIC simsimd_capability_t simsimd_capabilities(void); -SIMSIMD_DYNAMIC void simsimd_find_metric_punned( // +SIMSIMD_DYNAMIC void simsimd_find_kernel_punned( // simsimd_metric_kind_t kind, // simsimd_datatype_t datatype, // simsimd_capability_t supported, // simsimd_capability_t allowed, // - simsimd_metric_punned_t *metric_output, // + simsimd_kernel_punned_t *kernel_output, // simsimd_capability_t *capability_output); #else SIMSIMD_PUBLIC simsimd_capability_t simsimd_capabilities(void); -SIMSIMD_PUBLIC void simsimd_find_metric_punned( // +SIMSIMD_PUBLIC void simsimd_find_kernel_punned( // simsimd_metric_kind_t kind, // simsimd_datatype_t datatype, // simsimd_capability_t supported, // simsimd_capability_t allowed, // - simsimd_metric_punned_t *metric_output, // + simsimd_kernel_punned_t *kernel_output, // simsimd_capability_t *capability_output); #endif @@ -537,9 +538,9 @@ SIMSIMD_PUBLIC simsimd_capability_t _simsimd_capabilities_implementation(void) { #pragma clang diagnostic ignored "-Wvolatile" #endif -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f64(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE if (v & simsimd_cap_sve_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f64_sve, *c = simsimd_cap_sve_k; return; @@ -593,9 +594,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64(simsimd_capability_t v, si } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f32(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE if (v & simsimd_cap_sve_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f32_sve, *c = simsimd_cap_sve_k; return; @@ -661,9 +662,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32(simsimd_capability_t v, si } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f16(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE_F16 if (v & simsimd_cap_sve_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f16_sve, *c = simsimd_cap_sve_f16_k; return; @@ -737,9 +738,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16(simsimd_capability_t v, si } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_bf16(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE_BF16 if (v & simsimd_cap_sve_bf16_k) switch (k) { case simsimd_metric_cos_k: *m = (m_t)&simsimd_cos_bf16_sve, *c = simsimd_cap_sve_bf16_k; return; @@ -809,9 +810,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16(simsimd_capability_t v, s } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_i8(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_i8(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_NEON_I8 if (v & simsimd_cap_neon_i8_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_i8_neon, *c = simsimd_cap_neon_i8_k; return; @@ -865,9 +866,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_i8(simsimd_capability_t v, sim default: break; } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u8(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_u8(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_NEON_I8 if (v & simsimd_cap_neon_i8_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_u8_neon, *c = simsimd_cap_neon_i8_k; return; @@ -922,9 +923,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u8(simsimd_capability_t v, sim } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_b8(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_b8(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE if (v & simsimd_cap_sve_k) switch (k) { case simsimd_metric_hamming_k: *m = (m_t)&simsimd_hamming_b8_sve, *c = simsimd_cap_sve_k; return; @@ -960,9 +961,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_b8(simsimd_capability_t v, sim } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64c(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f64c(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE if (v & simsimd_cap_sve_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f64c_sve, *c = simsimd_cap_sve_k; return; @@ -984,9 +985,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64c(simsimd_capability_t v, s } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32c(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f32c(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE if (v & simsimd_cap_sve_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f32c_sve, *c = simsimd_cap_sve_k; return; @@ -1022,9 +1023,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32c(simsimd_capability_t v, s } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16c(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f16c(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE_F16 if (v & simsimd_cap_sve_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f16c_sve, *c = simsimd_cap_sve_f16_k; return; @@ -1060,9 +1061,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16c(simsimd_capability_t v, s } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16c(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_bf16c(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_NEON_BF16 if (v & simsimd_cap_neon_bf16_k) switch (k) { case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_bf16c_neon, *c = simsimd_cap_neon_bf16_k; return; @@ -1084,9 +1085,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16c(simsimd_capability_t v, } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u16(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_u16(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE2 if (v & simsimd_cap_sve2_k) switch (k) { case simsimd_metric_intersect_k: *m = (m_t)&simsimd_intersect_u16_sve2, *c = simsimd_cap_sve2_k; return; @@ -1125,9 +1126,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u16(simsimd_capability_t v, si } } -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u32(simsimd_capability_t v, simsimd_metric_kind_t k, - simsimd_metric_punned_t *m, simsimd_capability_t *c) { - typedef simsimd_metric_punned_t m_t; +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_u32(simsimd_capability_t v, simsimd_metric_kind_t k, + simsimd_kernel_punned_t *m, simsimd_capability_t *c) { + typedef simsimd_kernel_punned_t m_t; #if SIMSIMD_TARGET_SVE2 if (v & simsimd_cap_sve2_k) switch (k) { case simsimd_metric_intersect_k: *m = (m_t)&simsimd_intersect_u32_sve2, *c = simsimd_cap_sve2_k; return; @@ -1166,15 +1167,15 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u32(simsimd_capability_t v, si * @param datatype The data type for which the metric needs to be evaluated. * @param supported The hardware capabilities supported by the CPU. * @param allowed The hardware capabilities allowed for use. - * @param metric_output Output variable for the selected similarity function. + * @param kernel_output Output variable for the selected similarity function. * @param capability_output Output variable for the utilized hardware capabilities. */ -SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( // +SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_implementation( // simsimd_metric_kind_t kind, // simsimd_datatype_t datatype, // simsimd_capability_t supported, // simsimd_capability_t allowed, // - simsimd_metric_punned_t *metric_output, // + simsimd_kernel_punned_t *kernel_output, // simsimd_capability_t *capability_output) { // Modern compilers abso-freaking-lutely love optimizing-out my logic! @@ -1186,25 +1187,25 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( // __asm__ __volatile__("" ::: "memory"); #endif - simsimd_metric_punned_t *m = metric_output; + simsimd_kernel_punned_t *m = kernel_output; simsimd_capability_t *c = capability_output; simsimd_capability_t viable = (simsimd_capability_t)(supported & allowed); switch (datatype) { - case simsimd_datatype_f64_k: _simsimd_find_metric_punned_f64(viable, kind, m, c); return; - case simsimd_datatype_f32_k: _simsimd_find_metric_punned_f32(viable, kind, m, c); return; - case simsimd_datatype_f16_k: _simsimd_find_metric_punned_f16(viable, kind, m, c); return; - case simsimd_datatype_bf16_k: _simsimd_find_metric_punned_bf16(viable, kind, m, c); return; - case simsimd_datatype_i8_k: _simsimd_find_metric_punned_i8(viable, kind, m, c); return; - case simsimd_datatype_u8_k: _simsimd_find_metric_punned_u8(viable, kind, m, c); return; - case simsimd_datatype_b8_k: _simsimd_find_metric_punned_b8(viable, kind, m, c); return; - case simsimd_datatype_f32c_k: _simsimd_find_metric_punned_f32c(viable, kind, m, c); return; - case simsimd_datatype_f64c_k: _simsimd_find_metric_punned_f64c(viable, kind, m, c); return; - case simsimd_datatype_f16c_k: _simsimd_find_metric_punned_f16c(viable, kind, m, c); return; - case simsimd_datatype_bf16c_k: _simsimd_find_metric_punned_bf16c(viable, kind, m, c); return; - case simsimd_datatype_u16_k: _simsimd_find_metric_punned_u16(viable, kind, m, c); return; - case simsimd_datatype_u32_k: _simsimd_find_metric_punned_u32(viable, kind, m, c); return; + case simsimd_datatype_f64_k: _simsimd_find_kernel_punned_f64(viable, kind, m, c); return; + case simsimd_datatype_f32_k: _simsimd_find_kernel_punned_f32(viable, kind, m, c); return; + case simsimd_datatype_f16_k: _simsimd_find_kernel_punned_f16(viable, kind, m, c); return; + case simsimd_datatype_bf16_k: _simsimd_find_kernel_punned_bf16(viable, kind, m, c); return; + case simsimd_datatype_i8_k: _simsimd_find_kernel_punned_i8(viable, kind, m, c); return; + case simsimd_datatype_u8_k: _simsimd_find_kernel_punned_u8(viable, kind, m, c); return; + case simsimd_datatype_b8_k: _simsimd_find_kernel_punned_b8(viable, kind, m, c); return; + case simsimd_datatype_f32c_k: _simsimd_find_kernel_punned_f32c(viable, kind, m, c); return; + case simsimd_datatype_f64c_k: _simsimd_find_kernel_punned_f64c(viable, kind, m, c); return; + case simsimd_datatype_f16c_k: _simsimd_find_kernel_punned_f16c(viable, kind, m, c); return; + case simsimd_datatype_bf16c_k: _simsimd_find_kernel_punned_bf16c(viable, kind, m, c); return; + case simsimd_datatype_u16_k: _simsimd_find_kernel_punned_u16(viable, kind, m, c); return; + case simsimd_datatype_u32_k: _simsimd_find_kernel_punned_u32(viable, kind, m, c); return; // These data-types are not supported yet case simsimd_datatype_i4x2_k: break; @@ -1217,7 +1218,7 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( // } // Replace with zeros if no suitable implementation was found - *m = (simsimd_metric_punned_t)0; + *m = (simsimd_kernel_punned_t)0; *c = (simsimd_capability_t)0; // Modern compilers abso-freaking-lutely love optimizing-out my logic! @@ -1242,15 +1243,15 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( // * @param allowed The hardware capabilities allowed for use. * @return A function pointer to the selected metric implementation. */ -SIMSIMD_PUBLIC simsimd_metric_punned_t simsimd_metric_punned( // +SIMSIMD_PUBLIC simsimd_kernel_punned_t simsimd_metric_punned( // simsimd_metric_kind_t kind, // simsimd_datatype_t datatype, // simsimd_capability_t allowed) { - simsimd_metric_punned_t result = 0; + simsimd_kernel_punned_t result = 0; simsimd_capability_t c = simsimd_cap_serial_k; simsimd_capability_t supported = simsimd_capabilities(); - simsimd_find_metric_punned(kind, datatype, supported, allowed, &result, &c); + simsimd_find_kernel_punned(kind, datatype, supported, allowed, &result, &c); return result; } @@ -1462,14 +1463,14 @@ SIMSIMD_PUBLIC int simsimd_uses_turin(void) { return _SIMSIMD_TARGET_X86 && SIMS SIMSIMD_PUBLIC int simsimd_uses_sierra(void) { return _SIMSIMD_TARGET_X86 && SIMSIMD_TARGET_SIERRA; } SIMSIMD_PUBLIC int simsimd_uses_dynamic_dispatch(void) { return 0; } SIMSIMD_PUBLIC simsimd_capability_t simsimd_capabilities(void) { return _simsimd_capabilities_implementation(); } -SIMSIMD_PUBLIC void simsimd_find_metric_punned( // +SIMSIMD_PUBLIC void simsimd_find_kernel_punned( // simsimd_metric_kind_t kind, // simsimd_datatype_t datatype, // simsimd_capability_t supported, // simsimd_capability_t allowed, // - simsimd_metric_punned_t* metric_output, // + simsimd_kernel_punned_t* kernel_output, // simsimd_capability_t* capability_output) { - _simsimd_find_metric_punned_implementation(kind, datatype, supported, allowed, metric_output, capability_output); + _simsimd_find_kernel_punned_implementation(kind, datatype, supported, allowed, kernel_output, capability_output); } // clang-format on diff --git a/javascript/lib.c b/javascript/lib.c index fb4de82c..f668d795 100644 --- a/javascript/lib.c +++ b/javascript/lib.c @@ -53,9 +53,10 @@ napi_value dense(napi_env env, napi_callback_info info, simsimd_metric_kind_t me default: break; } - simsimd_metric_punned_t metric = NULL; + simsimd_metric_dense_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; - simsimd_find_metric_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k, &metric, &capability); + simsimd_find_kernel_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k, + (simsimd_kernel_punned_t *)&metric, &capability); if (metric == NULL) { napi_throw_error(env, NULL, "Unsupported datatype for given metric"); return NULL; diff --git a/package-lock.json b/package-lock.json index fb0326c4..9a353f2a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "simsimd", - "version": "4.2.1", + "version": "5.9.2", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "simsimd", - "version": "4.2.1", + "version": "5.9.2", "hasInstallScript": true, "license": "Apache 2.0", "dependencies": { @@ -16,7 +16,7 @@ }, "devDependencies": { "@types/bindings": "^1.5.5", - "@types/node": "^20.12.2", + "@types/node": "^20.17.1", "node-gyp": "^10.0.1", "prebuildify": "^6.0.0", "typescript": "^5.3.3" @@ -107,12 +107,13 @@ } }, "node_modules/@types/node": { - "version": "20.12.2", - "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.2.tgz", - "integrity": "sha512-zQ0NYO87hyN6Xrclcqp7f8ZbXNbRfoGWNcMvHTPQp9UUrwI0mI7XBz+cu7/W6/VClYo2g63B0cjull/srU7LgQ==", + "version": "20.17.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.1.tgz", + "integrity": "sha512-j2VlPv1NnwPJbaCNv69FO/1z4lId0QmGvpT41YxitRtWlg96g/j8qcv2RKsLKe2F6OJgyXhupN1Xo17b2m139Q==", "devOptional": true, + "license": "MIT", "dependencies": { - "undici-types": "~5.26.4" + "undici-types": "~6.19.2" } }, "node_modules/abbrev": { @@ -1525,10 +1526,11 @@ } }, "node_modules/undici-types": { - "version": "5.26.5", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", - "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==", - "devOptional": true + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz", + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==", + "devOptional": true, + "license": "MIT" }, "node_modules/unique-filename": { "version": "3.0.0", diff --git a/package.json b/package.json index 6ee7ae2a..8bd7be79 100644 --- a/package.json +++ b/package.json @@ -48,7 +48,7 @@ }, "devDependencies": { "@types/bindings": "^1.5.5", - "@types/node": "^20.12.2", + "@types/node": "^20.17.1", "node-gyp": "^10.0.1", "prebuildify": "^6.0.0", "typescript": "^5.3.3" @@ -58,4 +58,4 @@ "mathjs": "^11.11.2", "usearch": "^2.8.15" } -} \ No newline at end of file +} diff --git a/python/annotations/__init__.pyi b/python/annotations/__init__.pyi index 484387fc..93976e26 100644 --- a/python/annotations/__init__.pyi +++ b/python/annotations/__init__.pyi @@ -31,49 +31,25 @@ _MetricType = Literal[ "wsum", ] _IntegralType = Literal[ - # Booleans - "c", - "b8", + "bin8", # Signed integers - "b", - "i8", "int8", - "h", - "i16", "int16", - "i", - "l", - "i32", "int32", - "q", - "i64", "int64", # Unsigned integers - "B", - "u8", "uint8", - "H", - "u16", "uint16", - "I", - "L", - "u32", "uint32", - "Q", - "u64", "uint64", ] _FloatType = Literal[ - "f", "f32", "float32", - "e", "f16", "float16", - "d", "f64", "float64", - "bh", #! Not supported by NumPy "bf16", #! Not supported by NumPy "bfloat16", #! Not supported by NumPy ] diff --git a/python/lib.c b/python/lib.c index debe289e..29d9e257 100644 --- a/python/lib.c +++ b/python/lib.c @@ -62,6 +62,28 @@ * If the argument is not found, a @b `KeyError` is raised. * * https://ashvardanian.com/posts/discount-on-keyword-arguments-in-python/ + * + * @section Buffer Protocol and NumPy Compatibility + * + * Most modern Machine Learning frameworks struggle with the buffer protocol compatibility. + * At best, they provide zero-copy NumPy views of the underlying data, introducing unnecessary + * dependency on NumPy, a memory allocation for the wrapper, and a constraint on the supported + * numeric types. The last is a noticeable limitation, as both PyTorch and TensorFlow have + * richer type systems than NumPy. + * + * You can't convert a PyTorch `Tensor` to a `memoryview` object. + * If you try to convert a `bf16` TensorFlow `Tensor` to a `memoryview` object, you will get an error: + * + * ! ValueError: cannot include dtype 'E' in a buffer + * + * Moreover, the CPython documentation and the NumPy documentation diverge on the format specificers + * for the `typestr` and `format` data-type descriptor strings, making the development error-prone. + * At this point, SimSIMD seems to be @b the_only_package that at least attempts to provide interoperability. + * + * https://numpy.org/doc/stable/reference/arrays.interface.html + * https://pearu.github.io/array_interface_pytorch.html + * https://github.com/pytorch/pytorch/issues/54138 + * https://github.com/pybind/pybind11/issues/1908 */ #include @@ -126,115 +148,114 @@ int is_complex(simsimd_datatype_t datatype) { datatype == simsimd_datatype_f16c_k || datatype == simsimd_datatype_bf16c_k; } -/// @brief Converts a numpy datatype string to a logical datatype, normalizing the format. +/// @brief Converts a Python-ic datatype string to a logical datatype, normalizing the format. /// @return `simsimd_datatype_unknown_k` if the datatype is not supported, otherwise the logical datatype. /// @see https://docs.python.org/3/library/struct.html#format-characters -simsimd_datatype_t numpy_string_to_datatype(char const *name) { - // Floating-point numbers: - if (same_string(name, "f") || same_string(name, "shape[1]); // printf("buffer itemsize is %d\n", buffer->itemsize); parsed->start = buffer->buf; - parsed->datatype = numpy_string_to_datatype(buffer->format); + parsed->datatype = python_string_to_datatype(buffer->format); + if (parsed->datatype == simsimd_datatype_unknown_k) { + PyErr_Format(PyExc_ValueError, "Unsupported '%s' datatype specifier", buffer->format); + PyBuffer_Release(buffer); + return 0; + } + parsed->rank = buffer->ndim; if (buffer->ndim == 1) { if (buffer->strides[0] > buffer->itemsize) { @@ -514,8 +541,7 @@ int parse_tensor(PyObject *tensor, Py_buffer *buffer, TensorArgument *parsed) { } // We handle complex numbers differently - if (is_complex(parsed->datatype)) { parsed->dimensions *= 2; } - + if (is_complex(parsed->datatype)) parsed->dimensions *= 2; return 1; } @@ -684,9 +710,10 @@ static PyObject *implement_dense_metric( // } // Look up the metric and the capability - simsimd_metric_punned_t metric = NULL; + simsimd_metric_dense_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; - simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, &metric, &capability); + simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, + (simsimd_kernel_punned_t *)&metric, &capability); if (!metric) { PyErr_Format( // PyExc_LookupError, @@ -889,8 +916,8 @@ static PyObject *implement_curved_metric( // // Look up the metric and the capability simsimd_metric_curved_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; - simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, - (simsimd_metric_punned_t *)&metric, &capability); + simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, + (simsimd_kernel_punned_t *)&metric, &capability); if (!metric) { PyErr_Format( // PyExc_LookupError, @@ -948,8 +975,8 @@ static PyObject *implement_sparse_metric( // simsimd_datatype_t dtype = a_parsed.datatype; simsimd_metric_sparse_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; - simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, - (simsimd_metric_punned_t *)&metric, &capability); + simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, + (simsimd_kernel_punned_t *)&metric, &capability); if (!metric) { PyErr_Format( // PyExc_LookupError, "Unsupported metric '%c' and datatype combination ('%s'/'%s' and '%s'/'%s')", @@ -1040,9 +1067,10 @@ static PyObject *implement_cdist( // } // Look up the metric and the capability - simsimd_metric_punned_t metric = NULL; + simsimd_metric_dense_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; - simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, &metric, &capability); + simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, + (simsimd_kernel_punned_t *)&metric, &capability); if (!metric) { PyErr_Format( // PyExc_LookupError, "Unsupported metric '%c' and datatype combination ('%s'/'%s' and '%s'/'%s')", @@ -1175,9 +1203,9 @@ static PyObject *implement_pointer_access(simsimd_metric_kind_t metric_kind, PyO return NULL; } - simsimd_metric_punned_t metric = NULL; + simsimd_kernel_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; - simsimd_find_metric_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k, &metric, &capability); + simsimd_find_kernel_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k, &metric, &capability); if (metric == NULL) { PyErr_SetString(PyExc_LookupError, "No such metric"); return NULL; @@ -1193,16 +1221,14 @@ static char const doc_cdist[] = // " b (NDArray): Second matrix.\n" " metric (str, optional): Distance metric to use (e.g., 'sqeuclidean', 'cosine').\n" " out (NDArray, optional): Output matrix to store the result.\n" - " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type.\n" + " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type name.\n" " out_dtype (Union[FloatType, ComplexType], optional): Result type, default is 'float64'.\n\n" " threads (int, optional): Number of threads to use (default is 1).\n" "Returns:\n" " DistancesTensor: Pairwise distances between all inputs.\n\n" "Equivalent to: `scipy.spatial.distance.cdist`.\n" - "Notes:\n" - " * `a` and `b` are positional-only arguments.\n" - " * `metric` can be positional or keyword.\n" - " * `out`, `threads`, `dtype`, and `out_dtype` are keyword-only arguments."; + "Signature:\n" + " >>> def cdist(a, b, /, metric, *, dtype, out, out_dtype, threads) -> Optional[DistancesTensor]: ..."; static PyObject *api_cdist( // PyObject *self, PyObject *const *args, Py_ssize_t const positional_args_count, PyObject *args_names_tuple) { @@ -1356,7 +1382,7 @@ static char const doc_l2[] = // "Args:\n" " a (NDArray): First matrix or vector.\n" " b (NDArray): Second matrix or vector.\n" - " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n" + " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1376,7 +1402,7 @@ static char const doc_l2sq[] = // "Args:\n" " a (NDArray): First matrix or vector.\n" " b (NDArray): Second matrix or vector.\n" - " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n" + " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1396,7 +1422,7 @@ static char const doc_cos[] = // "Args:\n" " a (NDArray): First matrix or vector.\n" " b (NDArray): Second matrix or vector.\n" - " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n" + " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1416,7 +1442,7 @@ static char const doc_dot[] = // "Args:\n" " a (NDArray): First matrix or vector.\n" " b (NDArray): Second matrix or vector.\n" - " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type.\n" + " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (Union[FloatType, ComplexType], optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1436,7 +1462,7 @@ static char const doc_vdot[] = // "Args:\n" " a (NDArray): First complex matrix or vector.\n" " b (NDArray): Second complex matrix or vector.\n" - " dtype (ComplexType, optional): Override the presumed input type.\n" + " dtype (ComplexType, optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (Union[ComplexType], optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1456,7 +1482,7 @@ static char const doc_kl[] = // "Args:\n" " a (NDArray): First floating-point matrix or vector.\n" " b (NDArray): Second floating-point matrix or vector.\n" - " dtype (FloatType, optional): Override the presumed input type.\n" + " dtype (FloatType, optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1476,7 +1502,7 @@ static char const doc_js[] = // "Args:\n" " a (NDArray): First floating-point matrix or vector.\n" " b (NDArray): Second floating-point matrix or vector.\n" - " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n" + " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1496,7 +1522,7 @@ static char const doc_hamming[] = // "Args:\n" " a (NDArray): First binary matrix or vector.\n" " b (NDArray): Second binary matrix or vector.\n" - " dtype (IntegralType, optional): Override the presumed input type.\n" + " dtype (IntegralType, optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1516,7 +1542,7 @@ static char const doc_jaccard[] = // "Args:\n" " a (NDArray): First binary matrix or vector.\n" " b (NDArray): Second binary matrix or vector.\n" - " dtype (IntegralType, optional): Override the presumed input type.\n" + " dtype (IntegralType, optional): Override the presumed input type name.\n" " out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n" " out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n" "Returns:\n" @@ -1537,7 +1563,7 @@ static char const doc_bilinear[] = // " a (NDArray): First vector.\n" " b (NDArray): Second vector.\n" " metric_tensor (NDArray): The metric tensor defining the bilinear form.\n" - " dtype (FloatType, optional): Override the presumed input type.\n\n" + " dtype (FloatType, optional): Override the presumed input type name.\n\n" "Returns:\n" " float: The bilinear form.\n\n" "Equivalent to: `numpy.dot` with a metric tensor.\n" @@ -1555,7 +1581,7 @@ static char const doc_mahalanobis[] = // " a (NDArray): First vector.\n" " b (NDArray): Second vector.\n" " inverse_covariance (NDArray): The inverse of the covariance matrix.\n" - " dtype (FloatType, optional): Override the presumed input type.\n\n" + " dtype (FloatType, optional): Override the presumed input type name.\n\n" "Returns:\n" " float: The Mahalanobis distance.\n\n" "Equivalent to: `scipy.spatial.distance.mahalanobis`.\n" @@ -1588,7 +1614,7 @@ static char const doc_fma[] = // " a (NDArray): First vector.\n" " b (NDArray): Second vector.\n" " c (NDArray): Third vector.\n" - " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type.\n" + " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type name.\n" " alpha (float, optional): First scale, 1.0 by default.\n" " beta (float, optional): Second scale, 1.0 by default.\n" " out (NDArray, optional): Vector for resulting distances.\n\n" @@ -1712,8 +1738,8 @@ static PyObject *api_fma(PyObject *self, PyObject *const *args, Py_ssize_t const simsimd_kernel_fma_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; simsimd_metric_kind_t const metric_kind = simsimd_metric_fma_k; - simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, - (simsimd_metric_punned_t *)&metric, &capability); + simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, + (simsimd_kernel_punned_t *)&metric, &capability); if (!metric) { PyErr_Format( // PyExc_LookupError, @@ -1770,7 +1796,7 @@ static char const doc_wsum[] = // "Args:\n" " a (NDArray): First vector.\n" " b (NDArray): Second vector.\n" - " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type.\n" + " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type name.\n" " alpha (float, optional): First scale, 1.0 by default.\n" " beta (float, optional): Second scale, 1.0 by default.\n" " out (NDArray, optional): Vector for resulting distances.\n\n" @@ -1888,8 +1914,8 @@ static PyObject *api_wsum(PyObject *self, PyObject *const *args, Py_ssize_t cons simsimd_kernel_wsum_punned_t metric = NULL; simsimd_capability_t capability = simsimd_cap_serial_k; simsimd_metric_kind_t const metric_kind = simsimd_metric_wsum_k; - simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, - (simsimd_metric_punned_t *)&metric, &capability); + simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, + (simsimd_kernel_punned_t *)&metric, &capability); if (!metric) { PyErr_Format( // PyExc_LookupError, diff --git a/scripts/bench_vectors.py b/scripts/bench_vectors.py index 229b5bc3..12aeaf78 100755 --- a/scripts/bench_vectors.py +++ b/scripts/bench_vectors.py @@ -44,7 +44,7 @@ "sparse", # Intersection of two sparse integer sets, with float/int weights ] dtype_names = [ - "bits", #! Not supported by SciPy + "bin8", #! Not supported by SciPy "int8", #! Presented as supported, but overflows most of the time "uint16", "uint32", @@ -248,21 +248,21 @@ def for_dtypes( if "binary" in metric_families and include_scipy: yield from for_dtypes( "scipy.hamming", - ["bits"], + ["bin8"], spd.hamming, wrap_rows_batch_calls(spd.hamming), lambda A, B: spd.cdist(A, B, "hamming"), - lambda A, B: simd.hamming(A, B, "bits"), - lambda A, B: simd.cdist(A, B, "bits", metric="hamming"), + lambda A, B: simd.hamming(A, B, "bin8"), + lambda A, B: simd.cdist(A, B, "bin8", metric="hamming"), ) yield from for_dtypes( "scipy.jaccard", - ["bits"], + ["bin8"], spd.jaccard, wrap_rows_batch_calls(spd.jaccard), lambda A, B: spd.cdist(A, B, "jaccard"), - lambda A, B: simd.jaccard(A, B, "bits"), - lambda A, B: simd.cdist(A, B, "bits", metric="jaccard"), + lambda A, B: simd.jaccard(A, B, "bin8"), + lambda A, B: simd.cdist(A, B, "bin8", metric="jaccard"), ) if "spatial" in metric_families and include_scikit: yield from for_dtypes( @@ -351,7 +351,7 @@ def random_matrix(count: int, ndim: int, dtype: str) -> np.ndarray: return np.random.randint(0, high=256, size=(count, ndim), dtype=np.int16) if dtype == "int8": return np.random.randint(-100, high=100, size=(count, ndim), dtype=np.int8) - if dtype == "bits": + if dtype == "bin8": return np.packbits(np.random.randint(0, high=2, size=(count, ndim), dtype=np.uint8), axis=0) diff --git a/scripts/test.py b/scripts/test.py index f8b9bbdd..7408d71e 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -491,25 +491,25 @@ def hex_array(arr): def test_pointers_availability(): """Tests the availability of pre-compiled functions for compatibility with USearch.""" - assert simd.pointer_to_sqeuclidean("f64") != 0 - assert simd.pointer_to_cosine("f64") != 0 - assert simd.pointer_to_inner("f64") != 0 + assert simd.pointer_to_sqeuclidean("float64") != 0 + assert simd.pointer_to_cosine("float64") != 0 + assert simd.pointer_to_inner("float64") != 0 - assert simd.pointer_to_sqeuclidean("f32") != 0 - assert simd.pointer_to_cosine("f32") != 0 - assert simd.pointer_to_inner("f32") != 0 + assert simd.pointer_to_sqeuclidean("float32") != 0 + assert simd.pointer_to_cosine("float32") != 0 + assert simd.pointer_to_inner("float32") != 0 - assert simd.pointer_to_sqeuclidean("f16") != 0 - assert simd.pointer_to_cosine("f16") != 0 - assert simd.pointer_to_inner("f16") != 0 + assert simd.pointer_to_sqeuclidean("float16") != 0 + assert simd.pointer_to_cosine("float16") != 0 + assert simd.pointer_to_inner("float16") != 0 - assert simd.pointer_to_sqeuclidean("i8") != 0 - assert simd.pointer_to_cosine("i8") != 0 - assert simd.pointer_to_inner("i8") != 0 + assert simd.pointer_to_sqeuclidean("int8") != 0 + assert simd.pointer_to_cosine("int8") != 0 + assert simd.pointer_to_inner("int8") != 0 - assert simd.pointer_to_sqeuclidean("u8") != 0 - assert simd.pointer_to_cosine("u8") != 0 - assert simd.pointer_to_inner("u8") != 0 + assert simd.pointer_to_sqeuclidean("uint8") != 0 + assert simd.pointer_to_cosine("uint8") != 0 + assert simd.pointer_to_inner("uint8") != 0 def test_capabilities_list(): @@ -832,11 +832,11 @@ def test_dense_bits(ndim, metric, capability, stats_fixture): baseline_kernel, simd_kernel = name_to_kernels(metric) accurate_dt, accurate = profile(baseline_kernel, a.astype(np.uint64), b.astype(np.uint64)) expected_dt, expected = profile(baseline_kernel, a, b) - result_dt, result = profile(simd_kernel, np.packbits(a), np.packbits(b), "b8") + result_dt, result = profile(simd_kernel, np.packbits(a), np.packbits(b), "bin8") result = np.array(result) np.testing.assert_allclose(result, expected, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL) - collect_errors(metric, ndim, "bits", accurate, accurate_dt, expected, expected_dt, result, result_dt, stats_fixture) + collect_errors(metric, ndim, "bin8", accurate, accurate_dt, expected, expected_dt, result, result_dt, stats_fixture) @pytest.mark.skip(reason="Problems inferring the tolerance bounds for numerical errors") @@ -1391,10 +1391,10 @@ def test_cdist_hamming(ndim, out_dtype, capability): if out_dtype is None: # SciPy divides the Hamming distance by the number of dimensions, so we need to multiply it back. expected = spd.cdist(A, B, "hamming") * ndim - result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="b8") + result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="bin8") else: expected = (spd.cdist(A, B, "hamming") * ndim).astype(out_dtype) - result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="b8", out_dtype=out_dtype) + result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="bin8", out_dtype=out_dtype) np.testing.assert_allclose(result, expected, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL) diff --git a/swift/SimSIMD.swift b/swift/SimSIMD.swift index cb618e3e..56539453 100644 --- a/swift/SimSIMD.swift +++ b/swift/SimSIMD.swift @@ -2,9 +2,9 @@ import CSimSIMD public protocol SimSIMD { static var dataType: simsimd_datatype_t { get } - static var cosine: simsimd_metric_punned_t { get } - static var dotProduct: simsimd_metric_punned_t { get } - static var squaredEuclidean: simsimd_metric_punned_t { get } + static var cosine: simsimd_metric_dense_punned_t { get } + static var dotProduct: simsimd_metric_dense_punned_t { get } + static var squaredEuclidean: simsimd_metric_dense_punned_t { get } } extension Int8: SimSIMD { @@ -71,7 +71,7 @@ extension RandomAccessCollection where Element: SimSIMD { } @inlinable @inline(__always) -func perform(_ metric: simsimd_metric_punned_t, a: A, b: B) -> Double? where A: Sequence, B: Sequence, A.Element == B.Element { +func perform(_ metric: simsimd_metric_dense_punned_t, a: A, b: B) -> Double? where A: Sequence, B: Sequence, A.Element == B.Element { var distance: simsimd_distance_t = 0 let result = a.withContiguousStorageIfAvailable { a in b.withContiguousStorageIfAvailable { b in @@ -118,10 +118,15 @@ extension simsimd_capability_t: OptionSet, CustomStringConvertible { } @inline(__always) -private func find(kind: simsimd_metric_kind_t, dataType: simsimd_datatype_t) -> simsimd_metric_punned_t { - var output: simsimd_metric_punned_t? +private func find(kind: simsimd_metric_kind_t, dataType: simsimd_datatype_t) -> simsimd_metric_dense_punned_t { + var output: simsimd_metric_dense_punned_t? var used = simsimd_capability_t.any - simsimd_find_metric_punned(kind, dataType, .available, .any, &output, &used) + // Use `withUnsafeMutablePointer` to safely cast `output` to the required pointer type. + withUnsafeMutablePointer(to: &output) { outputPtr in + // Cast the pointer to `UnsafeMutablePointer` + let castedPtr = outputPtr.withMemoryRebound(to: Optional.self, capacity: 1) { $0 } + simsimd_find_kernel_punned(kind, dataType, .available, .any, castedPtr, &used) + } guard let output else { fatalError("Could not find function \(kind) for \(dataType)") } return output }