diff --git a/.vscode/settings.json b/.vscode/settings.json
index c0c885af..4b489146 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -93,7 +93,9 @@
"format": "c",
"execution": "cpp",
"math.h": "c",
- "float.h": "c"
+ "float.h": "c",
+ "text_encoding": "cpp",
+ "stdio.h": "c"
},
"cSpell.words": [
"allclose",
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 0728a344..f7b9b91c 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -101,7 +101,7 @@ You can also benchmark against other libraries, filter the numeric types, and di
$ python scripts/bench_vectors.py --help
> usage: bench.py [-h] [--ndim NDIM] [-n COUNT]
> [--metric {all,dot,spatial,binary,probability,sparse}]
-> [--dtype {all,bits,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128}]
+> [--dtype {all,bin8,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128}]
> [--scipy] [--scikit] [--torch] [--tf] [--jax]
>
> Benchmark SimSIMD vs. other libraries
@@ -119,7 +119,7 @@ $ python scripts/bench_vectors.py --help
> `cdist`.
> --metric {all,dot,spatial,binary,probability,sparse}
> Distance metric to use, profiles everything by default
-> --dtype {all,bits,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128}
+> --dtype {all,bin8,int8,uint16,uint32,float16,float32,float64,bfloat16,complex32,complex64,complex128}
> Defines numeric types to benchmark, profiles everything by default
> --scipy Profile SciPy, must be installed
> --scikit Profile scikit-learn, must be installed
@@ -203,6 +203,35 @@ bun test
swift build && swift test -v
```
+Running Swift on Linux requires a couple of extra steps, as the Swift compiler is not available in the default repositories.
+Please get the most recent Swift tarball from the [official website](https://www.swift.org/install/).
+At the time of writing, for 64-bit Arm CPU running Ubuntu 22.04, the following commands would work:
+
+```bash
+wget https://download.swift.org/swift-5.9.2-release/ubuntu2204-aarch64/swift-5.9.2-RELEASE/swift-5.9.2-RELEASE-ubuntu22.04-aarch64.tar.gz
+tar xzf swift-5.9.2-RELEASE-ubuntu22.04-aarch64.tar.gz
+sudo mv swift-5.9.2-RELEASE-ubuntu22.04-aarch64 /usr/share/swift
+echo "export PATH=/usr/share/swift/usr/bin:$PATH" >> ~/.bashrc
+source ~/.bashrc
+```
+
+You can check the available images on [`swift.org/download` page](https://www.swift.org/download/#releases).
+For x86 CPUs, the following commands would work:
+
+```bash
+wget https://download.swift.org/swift-5.9.2-release/ubuntu2204/swift-5.9.2-RELEASE/swift-5.9.2-RELEASE-ubuntu22.04.tar.gz
+tar xzf swift-5.9.2-RELEASE-ubuntu22.04.tar.gz
+sudo mv swift-5.9.2-RELEASE-ubuntu22.04 /usr/share/swift
+echo "export PATH=/usr/share/swift/usr/bin:$PATH" >> ~/.bashrc
+source ~/.bashrc
+```
+
+Alternatively, on Linux, the official Swift Docker image can be used for builds and tests:
+
+```bash
+sudo docker run --rm -v "$PWD:/workspace" -w /workspace swift:5.9 /bin/bash -cl "swift build -c release --static-swift-stdlib && swift test -c release --enable-test-discovery"
+```
+
## GoLang
```sh
diff --git a/README.md b/README.md
index cded1247..6a0f90c2 100644
--- a/README.md
+++ b/README.md
@@ -69,9 +69,9 @@ Implemented distance functions include:
Moreover, SimSIMD...
-- handles `f64`, `f32`, `f16`, and `bf16` real & complex vectors.
-- handles `i8` integral, `i4` sub-byte, and `b8` binary vectors.
-- handles sparse `u32` and `u16` sets, and weighted sparse vectors.
+- handles `float64`, `float32`, `float16`, and `bfloat16` real & complex vectors.
+- handles `int8` integral, `int4` sub-byte, and `b8` binary vectors.
+- handles sparse `uint32` and `uint16` sets, and weighted sparse vectors.
- is a zero-dependency [header-only C 99](#using-simsimd-in-c) library.
- has [Python](#using-simsimd-in-python), [Rust](#using-simsimd-in-rust), [JS](#using-simsimd-in-javascript), and [Swift](#using-simsimd-in-swift) bindings.
- has Arm backends for NEON, Scalable Vector Extensions (SVE), and SVE2.
@@ -95,14 +95,14 @@ You can learn more about the technical implementation details in the following b
For reference, we use 1536-dimensional vectors, like the embeddings produced by the OpenAI Ada API.
Comparing the serial code throughput produced by GCC 12 to hand-optimized kernels in SimSIMD, we see the following single-core improvements for the two most common vector-vector similarity metrics - the Cosine similarity and the Euclidean distance:
-| Type | Apple M2 Pro | Intel Sapphire Rapids | AWS Graviton 4 |
-| :----- | ----------------------------: | -------------------------------: | ------------------------------: |
-| `f64` | 18.5 → 28.8 GB/s
+ 56 % | 21.9 → 41.4 GB/s
+ 89 % | 20.7 → 41.3 GB/s
+ 99 % |
-| `f32` | 9.2 → 29.6 GB/s
+ 221 % | 10.9 → 95.8 GB/s
+ 779 % | 4.9 → 41.9 GB/s
+ 755 % |
-| `f16` | 4.6 → 14.6 GB/s
+ 217 % | 3.1 → 108.4 GB/s
+ 3,397 % | 5.4 → 39.3 GB/s
+ 627 % |
-| `bf16` | 4.6 → 26.3 GB/s
+ 472 % | 0.8 → 59.5 GB/s
+7,437 % | 2.5 → 29.9 GB/s
+ 1,096 % |
-| `i8` | 25.8 → 47.1 GB/s
+ 83 % | 33.1 → 65.3 GB/s
+ 97 % | 35.2 → 43.5 GB/s
+ 24 % |
-| `u8` | | 32.5 → 66.5 GB/s
+ 105 % | |
+| Type | Apple M2 Pro | Intel Sapphire Rapids | AWS Graviton 4 |
+| :--------- | ----------------------------: | -------------------------------: | ------------------------------: |
+| `float64` | 18.5 → 28.8 GB/s
+ 56 % | 21.9 → 41.4 GB/s
+ 89 % | 20.7 → 41.3 GB/s
+ 99 % |
+| `float32` | 9.2 → 29.6 GB/s
+ 221 % | 10.9 → 95.8 GB/s
+ 779 % | 4.9 → 41.9 GB/s
+ 755 % |
+| `float16` | 4.6 → 14.6 GB/s
+ 217 % | 3.1 → 108.4 GB/s
+ 3,397 % | 5.4 → 39.3 GB/s
+ 627 % |
+| `bfloat16` | 4.6 → 26.3 GB/s
+ 472 % | 0.8 → 59.5 GB/s
+7,437 % | 2.5 → 29.9 GB/s
+ 1,096 % |
+| `int8` | 25.8 → 47.1 GB/s
+ 83 % | 33.1 → 65.3 GB/s
+ 97 % | 35.2 → 43.5 GB/s
+ 24 % |
+| `uint8` | | 32.5 → 66.5 GB/s
+ 105 % | |
Similar speedups are often observed even when compared to BLAS and LAPACK libraries underlying most numerical computing libraries, including NumPy and SciPy in Python.
Broader benchmarking results:
@@ -115,8 +115,8 @@ Broader benchmarking results:
The package is intended to replace the usage of `numpy.inner`, `numpy.dot`, and `scipy.spatial.distance`.
Aside from drastic performance improvements, SimSIMD significantly improves accuracy in mixed precision setups.
-NumPy and SciPy, processing `i8`, `u8` or `f16` vectors, will use the same types for accumulators, while SimSIMD can combine `i8` enumeration, `i16` multiplication, and `i32` accumulation to avoid overflows entirely.
-The same applies to processing `f16` and `bf16` values with `f32` precision.
+NumPy and SciPy, processing `int8`, `uint8` or `float16` vectors, will use the same types for accumulators, while SimSIMD can combine `int8` enumeration, `int16` multiplication, and `int32` accumulation to avoid overflows entirely.
+The same applies to processing `float16` and `bfloat16` values with `float32` precision.
### Installation
@@ -155,14 +155,33 @@ dist = simsimd.vdot(vec1.astype(np.complex64), vec2.astype(np.complex64)) # conj
```
Unlike SciPy, SimSIMD allows explicitly stating the precision of the input vectors, which is especially useful for mixed-precision setups.
+The `dtype` argument can be passed both by name and as a positional argument:
```py
-dist = simsimd.cosine(vec1, vec2, "i8")
-dist = simsimd.cosine(vec1, vec2, "f16")
-dist = simsimd.cosine(vec1, vec2, "f32")
-dist = simsimd.cosine(vec1, vec2, "f64")
-dist = simsimd.hamming(vec1, vec2, "bits")
-dist = simsimd.jaccard(vec1, vec2, "bits")
+dist = simsimd.cosine(vec1, vec2, "int8")
+dist = simsimd.cosine(vec1, vec2, "float16")
+dist = simsimd.cosine(vec1, vec2, "float32")
+dist = simsimd.cosine(vec1, vec2, "float64")
+dist = simsimd.hamming(vec1, vec2, "bit8")
+```
+
+With other frameworks, like PyTorch, one can get a richer type-system than NumPy, but the lack of good CPython interoperability makes it hard to pass data without copies.
+
+```py
+import numpy as np
+buf1 = np.empty(8, dtype=np.uint16)
+buf2 = np.empty(8, dtype=np.uint16)
+
+# View the same memory region with PyTorch and randomize it
+import torch
+vec1 = torch.asarray(memoryview(buf1), copy=False).view(torch.bfloat16)
+vec2 = torch.asarray(memoryview(buf2), copy=False).view(torch.bfloat16)
+torch.randn(8, out=vec1)
+torch.randn(8, out=vec2)
+
+# Both libs will look into the same memory buffers and report the same results
+dist_slow = 1 - torch.nn.functional.cosine_similarity(vec1, vec2, dim=0)
+dist_fast = simsimd.cosine(buf1, buf2, "bf16")
```
It also allows using SimSIMD for half-precision complex numbers, which NumPy does not support.
@@ -235,6 +254,48 @@ distances: DistancesTensor = simsimd.cdist(matrix1, matrix2, metric="cosine")
distances_array: np.ndarray = np.array(distances, copy=True) # now managed by NumPy
```
+### Elementwise Kernels
+
+SimSIMD also provides mixed-precision elementwise kernels, where the input vectors and the output have the same numeric type, but the intermediate accumulators are of a higher precision.
+
+```py
+import numpy as np
+from simsimd import fma, wsum
+
+# Let's take two FullHD video frames
+first_frame = np.random.randn(1920 * 1024).astype(np.uint8)
+second_frame = np.random.randn(1920 * 1024).astype(np.uint8)
+average_frame = np.empty_like(first_frame)
+wsum(first_frame, second_frame, alpha=0.5, beta=0.5, out=average_frame)
+
+# Slow analog with NumPy:
+slow_average_frame = (0.5 * first_frame + 0.5 * second_frame).astype(np.uint8)
+```
+
+Similarly, the `fma` takes three arguments and computes the fused multiply-add operation.
+In applications like Machine Learning you may also benefit from using the "brain-float" format not natively supported by NumPy.
+In 3D Graphics, for example, we can use FMA to compute the [Phong shading model](https://en.wikipedia.org/wiki/Phong_shading):
+
+```py
+# Assume a FullHD frame with random values for simplicity
+light_intensity = np.random.rand(1920 * 1080).astype(np.float16) # Intensity of light on each pixel
+diffuse_component = np.random.rand(1920 * 1080).astype(np.float16) # Diffuse reflectance on the surface
+specular_component = np.random.rand(1920 * 1080).astype(np.float16) # Specular reflectance for highlights
+output_color = np.empty_like(light_intensity) # Array to store the resulting color intensity
+
+# Define the scaling factors for diffuse and specular contributions
+alpha = 0.7 # Weight for the diffuse component
+beta = 0.3 # Weight for the specular component
+
+# Formula: color = alpha * light_intensity * diffuse_component + beta * specular_component
+fma(light_intensity, diffuse_component, specular_component,
+ dtype="float16", # Optional, unless it can't be inferred from the input
+ alpha=alpha, beta=beta, out=output_color)
+
+# Slow analog with NumPy for comparison
+slow_output_color = (alpha * light_intensity * diffuse_component + beta * specular_component).astype(np.float16)
+```
+
### Multithreading and Memory Usage
By default, computations use a single CPU core.
@@ -248,15 +309,15 @@ matrix1 = np.packbits(np.random.randint(2, size=(10_000, ndim)).astype(np.uint8)
matrix2 = np.packbits(np.random.randint(2, size=(1_000, ndim)).astype(np.uint8))
distances = simsimd.cdist(matrix1, matrix2,
- metric="hamming", # Unlike SciPy, SimSIMD doesn't divide by the number of dimensions
- out_dtype="u8", # so we can use `u8` instead of `f64` to save memory.
- threads=0, # Use all CPU cores with OpenMP.
- dtype="b8", # Override input argument type to `b8` eight-bit words.
+ metric="hamming", # Unlike SciPy, SimSIMD doesn't divide by the number of dimensions
+ out_dtype="uint8", # so we can use `uint8` instead of `float64` to save memory.
+ threads=0, # Use all CPU cores with OpenMP.
+ dtype="bin8", # Override input argument type to `bin8` eight-bit words.
)
```
-By default, the output distances will be stored in double-precision `f64` floating-point numbers.
-That behavior may not be space-efficient, especially if you are computing the hamming distance between short binary vectors, that will generally fit into 8x smaller `u8` or `u16` types.
+By default, the output distances will be stored in double-precision `float64` floating-point numbers.
+That behavior may not be space-efficient, especially if you are computing the hamming distance between short binary vectors, that will generally fit into 8x smaller `uint8` or `uint16` types.
To override this behavior, use the `dtype` argument.
### Helper Functions
@@ -575,7 +636,7 @@ Simplest of all, you can include the headers, and the compiler will automaticall
int main() {
simsimd_f32_t vector_a[1536];
simsimd_f32_t vector_b[1536];
- simsimd_metric_punned_t distance_function = simsimd_metric_punned(
+ simsimd_kernel_punned_t distance_function = simsimd_metric_punned(
simsimd_metric_cos_k, // Metric kind, like the angular cosine distance
simsimd_datatype_f32_k, // Data type, like: f16, f32, f64, i8, b8, and complex variants
simsimd_cap_any_k); // Which CPU capabilities are we allowed to use
@@ -663,7 +724,6 @@ int main() {
simsimd_vdot_f16c(f16s, f16s, 1536, &distance);
simsimd_vdot_f32c(f32s, f32s, 1536, &distance);
simsimd_vdot_f64c(f64s, f64s, 1536, &distance);
-
return 0;
}
```
@@ -676,13 +736,8 @@ int main() {
int main() {
simsimd_b8_t b8s[1536 / 8]; // 8 bits per word
simsimd_distance_t distance;
-
- // Hamming distance between two vectors
simsimd_hamming_b8(b8s, b8s, 1536 / 8, &distance);
-
- // Jaccard distance between two vectors
simsimd_jaccard_b8(b8s, b8s, 1536 / 8, &distance);
-
return 0;
}
```
@@ -707,7 +762,6 @@ int main() {
simsimd_kl_f16(f16s, f16s, 1536, &distance);
simsimd_kl_f32(f32s, f32s, 1536, &distance);
simsimd_kl_f64(f64s, f64s, 1536, &distance);
-
return 0;
}
```
@@ -949,10 +1003,10 @@ In NumPy terms, the implementation may look like:
```py
import numpy as np
-def wsum(A: np.ndarray, B: np.ndarray, Alpha: float, Beta: float) -> np.ndarray:
+def wsum(A: np.ndarray, B: np.ndarray, /, Alpha: float, Beta: float) -> np.ndarray:
assert A.dtype == B.dtype, "Input types must match and affect the output style"
return (Alpha * A + Beta * B).astype(A.dtype)
-def fma(A: np.ndarray, B: np.ndarray, C: np.ndarray, Alpha: float, Beta: float) -> np.ndarray:
+def fma(A: np.ndarray, B: np.ndarray, C: np.ndarray, /, Alpha: float, Beta: float) -> np.ndarray:
assert A.dtype == B.dtype and A.dtype == C.dtype, "Input types must match and affect the output style"
return (Alpha * A * B + Beta * C).astype(A.dtype)
```
@@ -1095,7 +1149,7 @@ All of the function names follow the same pattern: `simsimd_{function}_{type}_{b
- The type can be `f64`, `f32`, `f16`, `bf16`, `f64c`, `f32c`, `f16c`, `bf16c`, `i8`, or `b8`.
- The function can be `dot`, `vdot`, `cos`, `l2sq`, `hamming`, `jaccard`, `kl`, `js`, or `intersect`.
-To avoid hard-coding the backend, you can use the `simsimd_metric_punned_t` to pun the function pointer and the `simsimd_capabilities` function to get the available backends at runtime.
+To avoid hard-coding the backend, you can use the `simsimd_kernel_punned_t` to pun the function pointer and the `simsimd_capabilities` function to get the available backends at runtime.
To match all the function names, consider a RegEx:
```regex
diff --git a/c/lib.c b/c/lib.c
index d59724a1..60b985d5 100644
--- a/c/lib.c
+++ b/c/lib.c
@@ -55,20 +55,21 @@ extern "C" {
// If no metric is found, it returns NaN. We can obtain NaN by dividing 0.0 by 0.0, but that annoys
// the MSVC compiler. Instead we can directly write-in the signaling NaN (0x7FF0000000000001)
// or the qNaN (0x7FF8000000000000).
-#define SIMSIMD_DECLARATION_DENSE(name, extension, type) \
- SIMSIMD_DYNAMIC void simsimd_##name##_##extension(simsimd_##type##_t const *a, simsimd_##type##_t const *b, \
- simsimd_size_t n, simsimd_distance_t *results) { \
- static simsimd_metric_punned_t metric = 0; \
- if (metric == 0) { \
- simsimd_capability_t used_capability; \
- simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
- simsimd_capabilities(), simsimd_cap_any_k, &metric, &used_capability); \
- if (!metric) { \
- *(simsimd_u64_t *)results = 0x7FF0000000000001ull; \
- return; \
- } \
- } \
- metric(a, b, n, results); \
+#define SIMSIMD_DECLARATION_DENSE(name, extension, type) \
+ SIMSIMD_DYNAMIC void simsimd_##name##_##extension(simsimd_##type##_t const *a, simsimd_##type##_t const *b, \
+ simsimd_size_t n, simsimd_distance_t *results) { \
+ static simsimd_metric_dense_punned_t metric = 0; \
+ if (metric == 0) { \
+ simsimd_capability_t used_capability; \
+ simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
+ simsimd_capabilities(), simsimd_cap_any_k, (simsimd_kernel_punned_t *)&metric, \
+ &used_capability); \
+ if (!metric) { \
+ *(simsimd_u64_t *)results = 0x7FF0000000000001ull; \
+ return; \
+ } \
+ } \
+ metric(a, b, n, results); \
}
#define SIMSIMD_DECLARATION_SPARSE(name, extension, type) \
@@ -78,9 +79,9 @@ extern "C" {
static simsimd_metric_sparse_punned_t metric = 0; \
if (metric == 0) { \
simsimd_capability_t used_capability; \
- simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
+ simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
simsimd_capabilities(), simsimd_cap_any_k, \
- (simsimd_metric_punned_t *)(&metric), &used_capability); \
+ (simsimd_kernel_punned_t *)(&metric), &used_capability); \
if (!metric) { \
*(simsimd_u64_t *)result = 0x7FF0000000000001ull; \
return; \
@@ -96,9 +97,9 @@ extern "C" {
static simsimd_metric_curved_punned_t metric = 0; \
if (metric == 0) { \
simsimd_capability_t used_capability; \
- simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
+ simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
simsimd_capabilities(), simsimd_cap_any_k, \
- (simsimd_metric_punned_t *)(&metric), &used_capability); \
+ (simsimd_kernel_punned_t *)(&metric), &used_capability); \
if (!metric) { \
*(simsimd_u64_t *)result = 0x7FF0000000000001ull; \
return; \
@@ -114,9 +115,9 @@ extern "C" {
static simsimd_kernel_fma_punned_t metric = 0; \
if (metric == 0) { \
simsimd_capability_t used_capability; \
- simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
+ simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
simsimd_capabilities(), simsimd_cap_any_k, \
- (simsimd_metric_punned_t *)(&metric), &used_capability); \
+ (simsimd_kernel_punned_t *)(&metric), &used_capability); \
} \
metric(a, b, c, n, alpha, beta, result); \
}
@@ -128,9 +129,9 @@ extern "C" {
static simsimd_kernel_wsum_punned_t metric = 0; \
if (metric == 0) { \
simsimd_capability_t used_capability; \
- simsimd_find_metric_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
+ simsimd_find_kernel_punned(simsimd_metric_##name##_k, simsimd_datatype_##extension##_k, \
simsimd_capabilities(), simsimd_cap_any_k, \
- (simsimd_metric_punned_t *)(&metric), &used_capability); \
+ (simsimd_kernel_punned_t *)(&metric), &used_capability); \
} \
metric(a, b, n, alpha, beta, result); \
}
@@ -326,14 +327,14 @@ SIMSIMD_DYNAMIC simsimd_capability_t simsimd_capabilities(void) {
return static_capabilities;
}
-SIMSIMD_DYNAMIC void simsimd_find_metric_punned( //
+SIMSIMD_DYNAMIC void simsimd_find_kernel_punned( //
simsimd_metric_kind_t kind, //
simsimd_datatype_t datatype, //
simsimd_capability_t supported, //
simsimd_capability_t allowed, //
- simsimd_metric_punned_t *metric_output, //
+ simsimd_kernel_punned_t *kernel_output, //
simsimd_capability_t *capability_output) {
- _simsimd_find_metric_punned_implementation(kind, datatype, supported, allowed, metric_output, capability_output);
+ _simsimd_find_kernel_punned_implementation(kind, datatype, supported, allowed, kernel_output, capability_output);
}
#ifdef __cplusplus
diff --git a/include/simsimd/elementwise.h b/include/simsimd/elementwise.h
index ea5dac23..fb3e72a5 100644
--- a/include/simsimd/elementwise.h
+++ b/include/simsimd/elementwise.h
@@ -36,8 +36,8 @@
* x86 intrinsics: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/
* Arm intrinsics: https://developer.arm.com/architectures/instruction-sets/intrinsics/
*/
-#ifndef SIMSIMD_FMA_H
-#define SIMSIMD_FMA_H
+#ifndef SIMSIMD_ELEMENTWISE_H
+#define SIMSIMD_ELEMENTWISE_H
#include "types.h"
diff --git a/include/simsimd/simsimd.h b/include/simsimd/simsimd.h
index eb94d76b..dce52855 100644
--- a/include/simsimd/simsimd.h
+++ b/include/simsimd/simsimd.h
@@ -304,28 +304,29 @@ typedef void (*simsimd_kernel_wsum_punned_t)(void const *a, void const *b, //
/**
* @brief Type-punned function pointer for a SimSIMD public interface.
- * Can be a `simsimd_metric_dense_punned_t`, `simsimd_metric_sparse_punned_t`,
- * or `simsimd_metric_curved_punned_t`.
+ *
+ * Can be a `simsimd_metric_dense_punned_t`, `simsimd_metric_sparse_punned_t`, `simsimd_metric_curved_punned_t`,
+ * `simsimd_kernel_fma_punned_t`, or `simsimd_kernel_wsum_punned_t`.
*/
-typedef simsimd_metric_dense_punned_t simsimd_metric_punned_t;
+typedef void (*simsimd_kernel_punned_t)(void *);
#if SIMSIMD_DYNAMIC_DISPATCH
SIMSIMD_DYNAMIC simsimd_capability_t simsimd_capabilities(void);
-SIMSIMD_DYNAMIC void simsimd_find_metric_punned( //
+SIMSIMD_DYNAMIC void simsimd_find_kernel_punned( //
simsimd_metric_kind_t kind, //
simsimd_datatype_t datatype, //
simsimd_capability_t supported, //
simsimd_capability_t allowed, //
- simsimd_metric_punned_t *metric_output, //
+ simsimd_kernel_punned_t *kernel_output, //
simsimd_capability_t *capability_output);
#else
SIMSIMD_PUBLIC simsimd_capability_t simsimd_capabilities(void);
-SIMSIMD_PUBLIC void simsimd_find_metric_punned( //
+SIMSIMD_PUBLIC void simsimd_find_kernel_punned( //
simsimd_metric_kind_t kind, //
simsimd_datatype_t datatype, //
simsimd_capability_t supported, //
simsimd_capability_t allowed, //
- simsimd_metric_punned_t *metric_output, //
+ simsimd_kernel_punned_t *kernel_output, //
simsimd_capability_t *capability_output);
#endif
@@ -537,9 +538,9 @@ SIMSIMD_PUBLIC simsimd_capability_t _simsimd_capabilities_implementation(void) {
#pragma clang diagnostic ignored "-Wvolatile"
#endif
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f64(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE
if (v & simsimd_cap_sve_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f64_sve, *c = simsimd_cap_sve_k; return;
@@ -593,9 +594,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64(simsimd_capability_t v, si
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f32(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE
if (v & simsimd_cap_sve_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f32_sve, *c = simsimd_cap_sve_k; return;
@@ -661,9 +662,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32(simsimd_capability_t v, si
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f16(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE_F16
if (v & simsimd_cap_sve_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f16_sve, *c = simsimd_cap_sve_f16_k; return;
@@ -737,9 +738,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16(simsimd_capability_t v, si
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_bf16(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE_BF16
if (v & simsimd_cap_sve_bf16_k) switch (k) {
case simsimd_metric_cos_k: *m = (m_t)&simsimd_cos_bf16_sve, *c = simsimd_cap_sve_bf16_k; return;
@@ -809,9 +810,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16(simsimd_capability_t v, s
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_i8(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_i8(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_NEON_I8
if (v & simsimd_cap_neon_i8_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_i8_neon, *c = simsimd_cap_neon_i8_k; return;
@@ -865,9 +866,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_i8(simsimd_capability_t v, sim
default: break;
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u8(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_u8(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_NEON_I8
if (v & simsimd_cap_neon_i8_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_u8_neon, *c = simsimd_cap_neon_i8_k; return;
@@ -922,9 +923,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u8(simsimd_capability_t v, sim
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_b8(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_b8(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE
if (v & simsimd_cap_sve_k) switch (k) {
case simsimd_metric_hamming_k: *m = (m_t)&simsimd_hamming_b8_sve, *c = simsimd_cap_sve_k; return;
@@ -960,9 +961,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_b8(simsimd_capability_t v, sim
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64c(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f64c(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE
if (v & simsimd_cap_sve_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f64c_sve, *c = simsimd_cap_sve_k; return;
@@ -984,9 +985,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f64c(simsimd_capability_t v, s
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32c(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f32c(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE
if (v & simsimd_cap_sve_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f32c_sve, *c = simsimd_cap_sve_k; return;
@@ -1022,9 +1023,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f32c(simsimd_capability_t v, s
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16c(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_f16c(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE_F16
if (v & simsimd_cap_sve_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_f16c_sve, *c = simsimd_cap_sve_f16_k; return;
@@ -1060,9 +1061,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_f16c(simsimd_capability_t v, s
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16c(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_bf16c(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_NEON_BF16
if (v & simsimd_cap_neon_bf16_k) switch (k) {
case simsimd_metric_dot_k: *m = (m_t)&simsimd_dot_bf16c_neon, *c = simsimd_cap_neon_bf16_k; return;
@@ -1084,9 +1085,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_bf16c(simsimd_capability_t v,
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u16(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_u16(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE2
if (v & simsimd_cap_sve2_k) switch (k) {
case simsimd_metric_intersect_k: *m = (m_t)&simsimd_intersect_u16_sve2, *c = simsimd_cap_sve2_k; return;
@@ -1125,9 +1126,9 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u16(simsimd_capability_t v, si
}
}
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u32(simsimd_capability_t v, simsimd_metric_kind_t k,
- simsimd_metric_punned_t *m, simsimd_capability_t *c) {
- typedef simsimd_metric_punned_t m_t;
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_u32(simsimd_capability_t v, simsimd_metric_kind_t k,
+ simsimd_kernel_punned_t *m, simsimd_capability_t *c) {
+ typedef simsimd_kernel_punned_t m_t;
#if SIMSIMD_TARGET_SVE2
if (v & simsimd_cap_sve2_k) switch (k) {
case simsimd_metric_intersect_k: *m = (m_t)&simsimd_intersect_u32_sve2, *c = simsimd_cap_sve2_k; return;
@@ -1166,15 +1167,15 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_u32(simsimd_capability_t v, si
* @param datatype The data type for which the metric needs to be evaluated.
* @param supported The hardware capabilities supported by the CPU.
* @param allowed The hardware capabilities allowed for use.
- * @param metric_output Output variable for the selected similarity function.
+ * @param kernel_output Output variable for the selected similarity function.
* @param capability_output Output variable for the utilized hardware capabilities.
*/
-SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( //
+SIMSIMD_INTERNAL void _simsimd_find_kernel_punned_implementation( //
simsimd_metric_kind_t kind, //
simsimd_datatype_t datatype, //
simsimd_capability_t supported, //
simsimd_capability_t allowed, //
- simsimd_metric_punned_t *metric_output, //
+ simsimd_kernel_punned_t *kernel_output, //
simsimd_capability_t *capability_output) {
// Modern compilers abso-freaking-lutely love optimizing-out my logic!
@@ -1186,25 +1187,25 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( //
__asm__ __volatile__("" ::: "memory");
#endif
- simsimd_metric_punned_t *m = metric_output;
+ simsimd_kernel_punned_t *m = kernel_output;
simsimd_capability_t *c = capability_output;
simsimd_capability_t viable = (simsimd_capability_t)(supported & allowed);
switch (datatype) {
- case simsimd_datatype_f64_k: _simsimd_find_metric_punned_f64(viable, kind, m, c); return;
- case simsimd_datatype_f32_k: _simsimd_find_metric_punned_f32(viable, kind, m, c); return;
- case simsimd_datatype_f16_k: _simsimd_find_metric_punned_f16(viable, kind, m, c); return;
- case simsimd_datatype_bf16_k: _simsimd_find_metric_punned_bf16(viable, kind, m, c); return;
- case simsimd_datatype_i8_k: _simsimd_find_metric_punned_i8(viable, kind, m, c); return;
- case simsimd_datatype_u8_k: _simsimd_find_metric_punned_u8(viable, kind, m, c); return;
- case simsimd_datatype_b8_k: _simsimd_find_metric_punned_b8(viable, kind, m, c); return;
- case simsimd_datatype_f32c_k: _simsimd_find_metric_punned_f32c(viable, kind, m, c); return;
- case simsimd_datatype_f64c_k: _simsimd_find_metric_punned_f64c(viable, kind, m, c); return;
- case simsimd_datatype_f16c_k: _simsimd_find_metric_punned_f16c(viable, kind, m, c); return;
- case simsimd_datatype_bf16c_k: _simsimd_find_metric_punned_bf16c(viable, kind, m, c); return;
- case simsimd_datatype_u16_k: _simsimd_find_metric_punned_u16(viable, kind, m, c); return;
- case simsimd_datatype_u32_k: _simsimd_find_metric_punned_u32(viable, kind, m, c); return;
+ case simsimd_datatype_f64_k: _simsimd_find_kernel_punned_f64(viable, kind, m, c); return;
+ case simsimd_datatype_f32_k: _simsimd_find_kernel_punned_f32(viable, kind, m, c); return;
+ case simsimd_datatype_f16_k: _simsimd_find_kernel_punned_f16(viable, kind, m, c); return;
+ case simsimd_datatype_bf16_k: _simsimd_find_kernel_punned_bf16(viable, kind, m, c); return;
+ case simsimd_datatype_i8_k: _simsimd_find_kernel_punned_i8(viable, kind, m, c); return;
+ case simsimd_datatype_u8_k: _simsimd_find_kernel_punned_u8(viable, kind, m, c); return;
+ case simsimd_datatype_b8_k: _simsimd_find_kernel_punned_b8(viable, kind, m, c); return;
+ case simsimd_datatype_f32c_k: _simsimd_find_kernel_punned_f32c(viable, kind, m, c); return;
+ case simsimd_datatype_f64c_k: _simsimd_find_kernel_punned_f64c(viable, kind, m, c); return;
+ case simsimd_datatype_f16c_k: _simsimd_find_kernel_punned_f16c(viable, kind, m, c); return;
+ case simsimd_datatype_bf16c_k: _simsimd_find_kernel_punned_bf16c(viable, kind, m, c); return;
+ case simsimd_datatype_u16_k: _simsimd_find_kernel_punned_u16(viable, kind, m, c); return;
+ case simsimd_datatype_u32_k: _simsimd_find_kernel_punned_u32(viable, kind, m, c); return;
// These data-types are not supported yet
case simsimd_datatype_i4x2_k: break;
@@ -1217,7 +1218,7 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( //
}
// Replace with zeros if no suitable implementation was found
- *m = (simsimd_metric_punned_t)0;
+ *m = (simsimd_kernel_punned_t)0;
*c = (simsimd_capability_t)0;
// Modern compilers abso-freaking-lutely love optimizing-out my logic!
@@ -1242,15 +1243,15 @@ SIMSIMD_INTERNAL void _simsimd_find_metric_punned_implementation( //
* @param allowed The hardware capabilities allowed for use.
* @return A function pointer to the selected metric implementation.
*/
-SIMSIMD_PUBLIC simsimd_metric_punned_t simsimd_metric_punned( //
+SIMSIMD_PUBLIC simsimd_kernel_punned_t simsimd_metric_punned( //
simsimd_metric_kind_t kind, //
simsimd_datatype_t datatype, //
simsimd_capability_t allowed) {
- simsimd_metric_punned_t result = 0;
+ simsimd_kernel_punned_t result = 0;
simsimd_capability_t c = simsimd_cap_serial_k;
simsimd_capability_t supported = simsimd_capabilities();
- simsimd_find_metric_punned(kind, datatype, supported, allowed, &result, &c);
+ simsimd_find_kernel_punned(kind, datatype, supported, allowed, &result, &c);
return result;
}
@@ -1462,14 +1463,14 @@ SIMSIMD_PUBLIC int simsimd_uses_turin(void) { return _SIMSIMD_TARGET_X86 && SIMS
SIMSIMD_PUBLIC int simsimd_uses_sierra(void) { return _SIMSIMD_TARGET_X86 && SIMSIMD_TARGET_SIERRA; }
SIMSIMD_PUBLIC int simsimd_uses_dynamic_dispatch(void) { return 0; }
SIMSIMD_PUBLIC simsimd_capability_t simsimd_capabilities(void) { return _simsimd_capabilities_implementation(); }
-SIMSIMD_PUBLIC void simsimd_find_metric_punned( //
+SIMSIMD_PUBLIC void simsimd_find_kernel_punned( //
simsimd_metric_kind_t kind, //
simsimd_datatype_t datatype, //
simsimd_capability_t supported, //
simsimd_capability_t allowed, //
- simsimd_metric_punned_t* metric_output, //
+ simsimd_kernel_punned_t* kernel_output, //
simsimd_capability_t* capability_output) {
- _simsimd_find_metric_punned_implementation(kind, datatype, supported, allowed, metric_output, capability_output);
+ _simsimd_find_kernel_punned_implementation(kind, datatype, supported, allowed, kernel_output, capability_output);
}
// clang-format on
diff --git a/javascript/lib.c b/javascript/lib.c
index fb4de82c..f668d795 100644
--- a/javascript/lib.c
+++ b/javascript/lib.c
@@ -53,9 +53,10 @@ napi_value dense(napi_env env, napi_callback_info info, simsimd_metric_kind_t me
default: break;
}
- simsimd_metric_punned_t metric = NULL;
+ simsimd_metric_dense_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
- simsimd_find_metric_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k, &metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k,
+ (simsimd_kernel_punned_t *)&metric, &capability);
if (metric == NULL) {
napi_throw_error(env, NULL, "Unsupported datatype for given metric");
return NULL;
diff --git a/package-lock.json b/package-lock.json
index fb0326c4..9a353f2a 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "simsimd",
- "version": "4.2.1",
+ "version": "5.9.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "simsimd",
- "version": "4.2.1",
+ "version": "5.9.2",
"hasInstallScript": true,
"license": "Apache 2.0",
"dependencies": {
@@ -16,7 +16,7 @@
},
"devDependencies": {
"@types/bindings": "^1.5.5",
- "@types/node": "^20.12.2",
+ "@types/node": "^20.17.1",
"node-gyp": "^10.0.1",
"prebuildify": "^6.0.0",
"typescript": "^5.3.3"
@@ -107,12 +107,13 @@
}
},
"node_modules/@types/node": {
- "version": "20.12.2",
- "resolved": "https://registry.npmjs.org/@types/node/-/node-20.12.2.tgz",
- "integrity": "sha512-zQ0NYO87hyN6Xrclcqp7f8ZbXNbRfoGWNcMvHTPQp9UUrwI0mI7XBz+cu7/W6/VClYo2g63B0cjull/srU7LgQ==",
+ "version": "20.17.1",
+ "resolved": "https://registry.npmjs.org/@types/node/-/node-20.17.1.tgz",
+ "integrity": "sha512-j2VlPv1NnwPJbaCNv69FO/1z4lId0QmGvpT41YxitRtWlg96g/j8qcv2RKsLKe2F6OJgyXhupN1Xo17b2m139Q==",
"devOptional": true,
+ "license": "MIT",
"dependencies": {
- "undici-types": "~5.26.4"
+ "undici-types": "~6.19.2"
}
},
"node_modules/abbrev": {
@@ -1525,10 +1526,11 @@
}
},
"node_modules/undici-types": {
- "version": "5.26.5",
- "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz",
- "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==",
- "devOptional": true
+ "version": "6.19.8",
+ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.19.8.tgz",
+ "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==",
+ "devOptional": true,
+ "license": "MIT"
},
"node_modules/unique-filename": {
"version": "3.0.0",
diff --git a/package.json b/package.json
index 6ee7ae2a..8bd7be79 100644
--- a/package.json
+++ b/package.json
@@ -48,7 +48,7 @@
},
"devDependencies": {
"@types/bindings": "^1.5.5",
- "@types/node": "^20.12.2",
+ "@types/node": "^20.17.1",
"node-gyp": "^10.0.1",
"prebuildify": "^6.0.0",
"typescript": "^5.3.3"
@@ -58,4 +58,4 @@
"mathjs": "^11.11.2",
"usearch": "^2.8.15"
}
-}
\ No newline at end of file
+}
diff --git a/python/annotations/__init__.pyi b/python/annotations/__init__.pyi
index 484387fc..93976e26 100644
--- a/python/annotations/__init__.pyi
+++ b/python/annotations/__init__.pyi
@@ -31,49 +31,25 @@ _MetricType = Literal[
"wsum",
]
_IntegralType = Literal[
- # Booleans
- "c",
- "b8",
+ "bin8",
# Signed integers
- "b",
- "i8",
"int8",
- "h",
- "i16",
"int16",
- "i",
- "l",
- "i32",
"int32",
- "q",
- "i64",
"int64",
# Unsigned integers
- "B",
- "u8",
"uint8",
- "H",
- "u16",
"uint16",
- "I",
- "L",
- "u32",
"uint32",
- "Q",
- "u64",
"uint64",
]
_FloatType = Literal[
- "f",
"f32",
"float32",
- "e",
"f16",
"float16",
- "d",
"f64",
"float64",
- "bh", #! Not supported by NumPy
"bf16", #! Not supported by NumPy
"bfloat16", #! Not supported by NumPy
]
diff --git a/python/lib.c b/python/lib.c
index debe289e..29d9e257 100644
--- a/python/lib.c
+++ b/python/lib.c
@@ -62,6 +62,28 @@
* If the argument is not found, a @b `KeyError` is raised.
*
* https://ashvardanian.com/posts/discount-on-keyword-arguments-in-python/
+ *
+ * @section Buffer Protocol and NumPy Compatibility
+ *
+ * Most modern Machine Learning frameworks struggle with the buffer protocol compatibility.
+ * At best, they provide zero-copy NumPy views of the underlying data, introducing unnecessary
+ * dependency on NumPy, a memory allocation for the wrapper, and a constraint on the supported
+ * numeric types. The last is a noticeable limitation, as both PyTorch and TensorFlow have
+ * richer type systems than NumPy.
+ *
+ * You can't convert a PyTorch `Tensor` to a `memoryview` object.
+ * If you try to convert a `bf16` TensorFlow `Tensor` to a `memoryview` object, you will get an error:
+ *
+ * ! ValueError: cannot include dtype 'E' in a buffer
+ *
+ * Moreover, the CPython documentation and the NumPy documentation diverge on the format specificers
+ * for the `typestr` and `format` data-type descriptor strings, making the development error-prone.
+ * At this point, SimSIMD seems to be @b the_only_package that at least attempts to provide interoperability.
+ *
+ * https://numpy.org/doc/stable/reference/arrays.interface.html
+ * https://pearu.github.io/array_interface_pytorch.html
+ * https://github.com/pytorch/pytorch/issues/54138
+ * https://github.com/pybind/pybind11/issues/1908
*/
#include
@@ -126,115 +148,114 @@ int is_complex(simsimd_datatype_t datatype) {
datatype == simsimd_datatype_f16c_k || datatype == simsimd_datatype_bf16c_k;
}
-/// @brief Converts a numpy datatype string to a logical datatype, normalizing the format.
+/// @brief Converts a Python-ic datatype string to a logical datatype, normalizing the format.
/// @return `simsimd_datatype_unknown_k` if the datatype is not supported, otherwise the logical datatype.
/// @see https://docs.python.org/3/library/struct.html#format-characters
-simsimd_datatype_t numpy_string_to_datatype(char const *name) {
- // Floating-point numbers:
- if (same_string(name, "f") || same_string(name, "shape[1]);
// printf("buffer itemsize is %d\n", buffer->itemsize);
parsed->start = buffer->buf;
- parsed->datatype = numpy_string_to_datatype(buffer->format);
+ parsed->datatype = python_string_to_datatype(buffer->format);
+ if (parsed->datatype == simsimd_datatype_unknown_k) {
+ PyErr_Format(PyExc_ValueError, "Unsupported '%s' datatype specifier", buffer->format);
+ PyBuffer_Release(buffer);
+ return 0;
+ }
+
parsed->rank = buffer->ndim;
if (buffer->ndim == 1) {
if (buffer->strides[0] > buffer->itemsize) {
@@ -514,8 +541,7 @@ int parse_tensor(PyObject *tensor, Py_buffer *buffer, TensorArgument *parsed) {
}
// We handle complex numbers differently
- if (is_complex(parsed->datatype)) { parsed->dimensions *= 2; }
-
+ if (is_complex(parsed->datatype)) parsed->dimensions *= 2;
return 1;
}
@@ -684,9 +710,10 @@ static PyObject *implement_dense_metric( //
}
// Look up the metric and the capability
- simsimd_metric_punned_t metric = NULL;
+ simsimd_metric_dense_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
- simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, &metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
+ (simsimd_kernel_punned_t *)&metric, &capability);
if (!metric) {
PyErr_Format( //
PyExc_LookupError,
@@ -889,8 +916,8 @@ static PyObject *implement_curved_metric( //
// Look up the metric and the capability
simsimd_metric_curved_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
- simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
- (simsimd_metric_punned_t *)&metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
+ (simsimd_kernel_punned_t *)&metric, &capability);
if (!metric) {
PyErr_Format( //
PyExc_LookupError,
@@ -948,8 +975,8 @@ static PyObject *implement_sparse_metric( //
simsimd_datatype_t dtype = a_parsed.datatype;
simsimd_metric_sparse_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
- simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
- (simsimd_metric_punned_t *)&metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
+ (simsimd_kernel_punned_t *)&metric, &capability);
if (!metric) {
PyErr_Format( //
PyExc_LookupError, "Unsupported metric '%c' and datatype combination ('%s'/'%s' and '%s'/'%s')",
@@ -1040,9 +1067,10 @@ static PyObject *implement_cdist( //
}
// Look up the metric and the capability
- simsimd_metric_punned_t metric = NULL;
+ simsimd_metric_dense_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
- simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k, &metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
+ (simsimd_kernel_punned_t *)&metric, &capability);
if (!metric) {
PyErr_Format( //
PyExc_LookupError, "Unsupported metric '%c' and datatype combination ('%s'/'%s' and '%s'/'%s')",
@@ -1175,9 +1203,9 @@ static PyObject *implement_pointer_access(simsimd_metric_kind_t metric_kind, PyO
return NULL;
}
- simsimd_metric_punned_t metric = NULL;
+ simsimd_kernel_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
- simsimd_find_metric_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k, &metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, datatype, static_capabilities, simsimd_cap_any_k, &metric, &capability);
if (metric == NULL) {
PyErr_SetString(PyExc_LookupError, "No such metric");
return NULL;
@@ -1193,16 +1221,14 @@ static char const doc_cdist[] = //
" b (NDArray): Second matrix.\n"
" metric (str, optional): Distance metric to use (e.g., 'sqeuclidean', 'cosine').\n"
" out (NDArray, optional): Output matrix to store the result.\n"
- " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type.\n"
+ " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type name.\n"
" out_dtype (Union[FloatType, ComplexType], optional): Result type, default is 'float64'.\n\n"
" threads (int, optional): Number of threads to use (default is 1).\n"
"Returns:\n"
" DistancesTensor: Pairwise distances between all inputs.\n\n"
"Equivalent to: `scipy.spatial.distance.cdist`.\n"
- "Notes:\n"
- " * `a` and `b` are positional-only arguments.\n"
- " * `metric` can be positional or keyword.\n"
- " * `out`, `threads`, `dtype`, and `out_dtype` are keyword-only arguments.";
+ "Signature:\n"
+ " >>> def cdist(a, b, /, metric, *, dtype, out, out_dtype, threads) -> Optional[DistancesTensor]: ...";
static PyObject *api_cdist( //
PyObject *self, PyObject *const *args, Py_ssize_t const positional_args_count, PyObject *args_names_tuple) {
@@ -1356,7 +1382,7 @@ static char const doc_l2[] = //
"Args:\n"
" a (NDArray): First matrix or vector.\n"
" b (NDArray): Second matrix or vector.\n"
- " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n"
+ " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1376,7 +1402,7 @@ static char const doc_l2sq[] = //
"Args:\n"
" a (NDArray): First matrix or vector.\n"
" b (NDArray): Second matrix or vector.\n"
- " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n"
+ " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1396,7 +1422,7 @@ static char const doc_cos[] = //
"Args:\n"
" a (NDArray): First matrix or vector.\n"
" b (NDArray): Second matrix or vector.\n"
- " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n"
+ " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1416,7 +1442,7 @@ static char const doc_dot[] = //
"Args:\n"
" a (NDArray): First matrix or vector.\n"
" b (NDArray): Second matrix or vector.\n"
- " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type.\n"
+ " dtype (Union[IntegralType, FloatType, ComplexType], optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (Union[FloatType, ComplexType], optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1436,7 +1462,7 @@ static char const doc_vdot[] = //
"Args:\n"
" a (NDArray): First complex matrix or vector.\n"
" b (NDArray): Second complex matrix or vector.\n"
- " dtype (ComplexType, optional): Override the presumed input type.\n"
+ " dtype (ComplexType, optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (Union[ComplexType], optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1456,7 +1482,7 @@ static char const doc_kl[] = //
"Args:\n"
" a (NDArray): First floating-point matrix or vector.\n"
" b (NDArray): Second floating-point matrix or vector.\n"
- " dtype (FloatType, optional): Override the presumed input type.\n"
+ " dtype (FloatType, optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1476,7 +1502,7 @@ static char const doc_js[] = //
"Args:\n"
" a (NDArray): First floating-point matrix or vector.\n"
" b (NDArray): Second floating-point matrix or vector.\n"
- " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type.\n"
+ " dtype (Union[IntegralType, FloatType], optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1496,7 +1522,7 @@ static char const doc_hamming[] = //
"Args:\n"
" a (NDArray): First binary matrix or vector.\n"
" b (NDArray): Second binary matrix or vector.\n"
- " dtype (IntegralType, optional): Override the presumed input type.\n"
+ " dtype (IntegralType, optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1516,7 +1542,7 @@ static char const doc_jaccard[] = //
"Args:\n"
" a (NDArray): First binary matrix or vector.\n"
" b (NDArray): Second binary matrix or vector.\n"
- " dtype (IntegralType, optional): Override the presumed input type.\n"
+ " dtype (IntegralType, optional): Override the presumed input type name.\n"
" out (NDArray, optional): Vector for resulting distances. Allocates a new tensor by default.\n"
" out_dtype (FloatType, optional): Result type, default is 'float64'.\n\n"
"Returns:\n"
@@ -1537,7 +1563,7 @@ static char const doc_bilinear[] = //
" a (NDArray): First vector.\n"
" b (NDArray): Second vector.\n"
" metric_tensor (NDArray): The metric tensor defining the bilinear form.\n"
- " dtype (FloatType, optional): Override the presumed input type.\n\n"
+ " dtype (FloatType, optional): Override the presumed input type name.\n\n"
"Returns:\n"
" float: The bilinear form.\n\n"
"Equivalent to: `numpy.dot` with a metric tensor.\n"
@@ -1555,7 +1581,7 @@ static char const doc_mahalanobis[] = //
" a (NDArray): First vector.\n"
" b (NDArray): Second vector.\n"
" inverse_covariance (NDArray): The inverse of the covariance matrix.\n"
- " dtype (FloatType, optional): Override the presumed input type.\n\n"
+ " dtype (FloatType, optional): Override the presumed input type name.\n\n"
"Returns:\n"
" float: The Mahalanobis distance.\n\n"
"Equivalent to: `scipy.spatial.distance.mahalanobis`.\n"
@@ -1588,7 +1614,7 @@ static char const doc_fma[] = //
" a (NDArray): First vector.\n"
" b (NDArray): Second vector.\n"
" c (NDArray): Third vector.\n"
- " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type.\n"
+ " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type name.\n"
" alpha (float, optional): First scale, 1.0 by default.\n"
" beta (float, optional): Second scale, 1.0 by default.\n"
" out (NDArray, optional): Vector for resulting distances.\n\n"
@@ -1712,8 +1738,8 @@ static PyObject *api_fma(PyObject *self, PyObject *const *args, Py_ssize_t const
simsimd_kernel_fma_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
simsimd_metric_kind_t const metric_kind = simsimd_metric_fma_k;
- simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
- (simsimd_metric_punned_t *)&metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
+ (simsimd_kernel_punned_t *)&metric, &capability);
if (!metric) {
PyErr_Format( //
PyExc_LookupError,
@@ -1770,7 +1796,7 @@ static char const doc_wsum[] = //
"Args:\n"
" a (NDArray): First vector.\n"
" b (NDArray): Second vector.\n"
- " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type.\n"
+ " dtype (Union[IntegralType, FloatType], optional): Override the presumed numeric type name.\n"
" alpha (float, optional): First scale, 1.0 by default.\n"
" beta (float, optional): Second scale, 1.0 by default.\n"
" out (NDArray, optional): Vector for resulting distances.\n\n"
@@ -1888,8 +1914,8 @@ static PyObject *api_wsum(PyObject *self, PyObject *const *args, Py_ssize_t cons
simsimd_kernel_wsum_punned_t metric = NULL;
simsimd_capability_t capability = simsimd_cap_serial_k;
simsimd_metric_kind_t const metric_kind = simsimd_metric_wsum_k;
- simsimd_find_metric_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
- (simsimd_metric_punned_t *)&metric, &capability);
+ simsimd_find_kernel_punned(metric_kind, dtype, static_capabilities, simsimd_cap_any_k,
+ (simsimd_kernel_punned_t *)&metric, &capability);
if (!metric) {
PyErr_Format( //
PyExc_LookupError,
diff --git a/scripts/bench_vectors.py b/scripts/bench_vectors.py
index 229b5bc3..12aeaf78 100755
--- a/scripts/bench_vectors.py
+++ b/scripts/bench_vectors.py
@@ -44,7 +44,7 @@
"sparse", # Intersection of two sparse integer sets, with float/int weights
]
dtype_names = [
- "bits", #! Not supported by SciPy
+ "bin8", #! Not supported by SciPy
"int8", #! Presented as supported, but overflows most of the time
"uint16",
"uint32",
@@ -248,21 +248,21 @@ def for_dtypes(
if "binary" in metric_families and include_scipy:
yield from for_dtypes(
"scipy.hamming",
- ["bits"],
+ ["bin8"],
spd.hamming,
wrap_rows_batch_calls(spd.hamming),
lambda A, B: spd.cdist(A, B, "hamming"),
- lambda A, B: simd.hamming(A, B, "bits"),
- lambda A, B: simd.cdist(A, B, "bits", metric="hamming"),
+ lambda A, B: simd.hamming(A, B, "bin8"),
+ lambda A, B: simd.cdist(A, B, "bin8", metric="hamming"),
)
yield from for_dtypes(
"scipy.jaccard",
- ["bits"],
+ ["bin8"],
spd.jaccard,
wrap_rows_batch_calls(spd.jaccard),
lambda A, B: spd.cdist(A, B, "jaccard"),
- lambda A, B: simd.jaccard(A, B, "bits"),
- lambda A, B: simd.cdist(A, B, "bits", metric="jaccard"),
+ lambda A, B: simd.jaccard(A, B, "bin8"),
+ lambda A, B: simd.cdist(A, B, "bin8", metric="jaccard"),
)
if "spatial" in metric_families and include_scikit:
yield from for_dtypes(
@@ -351,7 +351,7 @@ def random_matrix(count: int, ndim: int, dtype: str) -> np.ndarray:
return np.random.randint(0, high=256, size=(count, ndim), dtype=np.int16)
if dtype == "int8":
return np.random.randint(-100, high=100, size=(count, ndim), dtype=np.int8)
- if dtype == "bits":
+ if dtype == "bin8":
return np.packbits(np.random.randint(0, high=2, size=(count, ndim), dtype=np.uint8), axis=0)
diff --git a/scripts/test.py b/scripts/test.py
index f8b9bbdd..7408d71e 100644
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -491,25 +491,25 @@ def hex_array(arr):
def test_pointers_availability():
"""Tests the availability of pre-compiled functions for compatibility with USearch."""
- assert simd.pointer_to_sqeuclidean("f64") != 0
- assert simd.pointer_to_cosine("f64") != 0
- assert simd.pointer_to_inner("f64") != 0
+ assert simd.pointer_to_sqeuclidean("float64") != 0
+ assert simd.pointer_to_cosine("float64") != 0
+ assert simd.pointer_to_inner("float64") != 0
- assert simd.pointer_to_sqeuclidean("f32") != 0
- assert simd.pointer_to_cosine("f32") != 0
- assert simd.pointer_to_inner("f32") != 0
+ assert simd.pointer_to_sqeuclidean("float32") != 0
+ assert simd.pointer_to_cosine("float32") != 0
+ assert simd.pointer_to_inner("float32") != 0
- assert simd.pointer_to_sqeuclidean("f16") != 0
- assert simd.pointer_to_cosine("f16") != 0
- assert simd.pointer_to_inner("f16") != 0
+ assert simd.pointer_to_sqeuclidean("float16") != 0
+ assert simd.pointer_to_cosine("float16") != 0
+ assert simd.pointer_to_inner("float16") != 0
- assert simd.pointer_to_sqeuclidean("i8") != 0
- assert simd.pointer_to_cosine("i8") != 0
- assert simd.pointer_to_inner("i8") != 0
+ assert simd.pointer_to_sqeuclidean("int8") != 0
+ assert simd.pointer_to_cosine("int8") != 0
+ assert simd.pointer_to_inner("int8") != 0
- assert simd.pointer_to_sqeuclidean("u8") != 0
- assert simd.pointer_to_cosine("u8") != 0
- assert simd.pointer_to_inner("u8") != 0
+ assert simd.pointer_to_sqeuclidean("uint8") != 0
+ assert simd.pointer_to_cosine("uint8") != 0
+ assert simd.pointer_to_inner("uint8") != 0
def test_capabilities_list():
@@ -832,11 +832,11 @@ def test_dense_bits(ndim, metric, capability, stats_fixture):
baseline_kernel, simd_kernel = name_to_kernels(metric)
accurate_dt, accurate = profile(baseline_kernel, a.astype(np.uint64), b.astype(np.uint64))
expected_dt, expected = profile(baseline_kernel, a, b)
- result_dt, result = profile(simd_kernel, np.packbits(a), np.packbits(b), "b8")
+ result_dt, result = profile(simd_kernel, np.packbits(a), np.packbits(b), "bin8")
result = np.array(result)
np.testing.assert_allclose(result, expected, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL)
- collect_errors(metric, ndim, "bits", accurate, accurate_dt, expected, expected_dt, result, result_dt, stats_fixture)
+ collect_errors(metric, ndim, "bin8", accurate, accurate_dt, expected, expected_dt, result, result_dt, stats_fixture)
@pytest.mark.skip(reason="Problems inferring the tolerance bounds for numerical errors")
@@ -1391,10 +1391,10 @@ def test_cdist_hamming(ndim, out_dtype, capability):
if out_dtype is None:
# SciPy divides the Hamming distance by the number of dimensions, so we need to multiply it back.
expected = spd.cdist(A, B, "hamming") * ndim
- result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="b8")
+ result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="bin8")
else:
expected = (spd.cdist(A, B, "hamming") * ndim).astype(out_dtype)
- result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="b8", out_dtype=out_dtype)
+ result = simd.cdist(A_bits, B_bits, metric="hamming", dtype="bin8", out_dtype=out_dtype)
np.testing.assert_allclose(result, expected, atol=SIMSIMD_ATOL, rtol=SIMSIMD_RTOL)
diff --git a/swift/SimSIMD.swift b/swift/SimSIMD.swift
index cb618e3e..56539453 100644
--- a/swift/SimSIMD.swift
+++ b/swift/SimSIMD.swift
@@ -2,9 +2,9 @@ import CSimSIMD
public protocol SimSIMD {
static var dataType: simsimd_datatype_t { get }
- static var cosine: simsimd_metric_punned_t { get }
- static var dotProduct: simsimd_metric_punned_t { get }
- static var squaredEuclidean: simsimd_metric_punned_t { get }
+ static var cosine: simsimd_metric_dense_punned_t { get }
+ static var dotProduct: simsimd_metric_dense_punned_t { get }
+ static var squaredEuclidean: simsimd_metric_dense_punned_t { get }
}
extension Int8: SimSIMD {
@@ -71,7 +71,7 @@ extension RandomAccessCollection where Element: SimSIMD {
}
@inlinable @inline(__always)
-func perform(_ metric: simsimd_metric_punned_t, a: A, b: B) -> Double? where A: Sequence, B: Sequence, A.Element == B.Element {
+func perform(_ metric: simsimd_metric_dense_punned_t, a: A, b: B) -> Double? where A: Sequence, B: Sequence, A.Element == B.Element {
var distance: simsimd_distance_t = 0
let result = a.withContiguousStorageIfAvailable { a in
b.withContiguousStorageIfAvailable { b in
@@ -118,10 +118,15 @@ extension simsimd_capability_t: OptionSet, CustomStringConvertible {
}
@inline(__always)
-private func find(kind: simsimd_metric_kind_t, dataType: simsimd_datatype_t) -> simsimd_metric_punned_t {
- var output: simsimd_metric_punned_t?
+private func find(kind: simsimd_metric_kind_t, dataType: simsimd_datatype_t) -> simsimd_metric_dense_punned_t {
+ var output: simsimd_metric_dense_punned_t?
var used = simsimd_capability_t.any
- simsimd_find_metric_punned(kind, dataType, .available, .any, &output, &used)
+ // Use `withUnsafeMutablePointer` to safely cast `output` to the required pointer type.
+ withUnsafeMutablePointer(to: &output) { outputPtr in
+ // Cast the pointer to `UnsafeMutablePointer`
+ let castedPtr = outputPtr.withMemoryRebound(to: Optional.self, capacity: 1) { $0 }
+ simsimd_find_kernel_punned(kind, dataType, .available, .any, castedPtr, &used)
+ }
guard let output else { fatalError("Could not find function \(kind) for \(dataType)") }
return output
}