diff --git a/include/simsimd/simsimd.h b/include/simsimd/simsimd.h
index 9d3eebc3..ced61c84 100644
--- a/include/simsimd/simsimd.h
+++ b/include/simsimd/simsimd.h
@@ -102,14 +102,15 @@
 #define SIMSIMD_DYNAMIC_DISPATCH (0) // true or false
 #endif
 
-#include "binary.h"      // Hamming, Jaccard
-#include "curved.h"      // Mahalanobis, Bilinear Forms
-#include "dot.h"         // Inner (dot) product, and its conjugate
-#include "elementwise.h" // Weighted Sum, Fused-Multiply-Add
-#include "geospatial.h"  // Haversine and Vincenty
-#include "probability.h" // Kullback-Leibler, Jensen–Shannon
-#include "sparse.h"      // Intersect
-#include "spatial.h"     // L2, Angular distances
+#include "binary.h"       // Hamming, Jaccard
+#include "curved.h"       // Mahalanobis, Bilinear Forms
+#include "dot.h"          // Inner (dot) product, and its conjugate
+#include "elementwise.h"  // Weighted Sum, Fused-Multiply-Add
+#include "geospatial.h"   // Haversine and Vincenty
+#include "probability.h"  // Kullback-Leibler, Jensen–Shannon
+#include "sparse.h"       // Intersect
+#include "spatial.h"      // L2, Angular distances
+#include "trigonometry.h" // Sine, Cosine
 
 // On Apple Silicon, `mrs` is not allowed in user-space, so we need to use the `sysctl` API.
 #if defined(_SIMSIMD_DEFINED_APPLE)
@@ -128,11 +129,11 @@ typedef enum {
     simsimd_kernel_unknown_k = 0, ///< Unknown kernel
 
     // Classics:
-    simsimd_dot_k = 'i',  ///< Inner product
-    simsimd_vdot_k = 'v', ///< Complex inner product
-    simsimd_cos_k = 'c',  ///< Cosine similarity
-    simsimd_l2_k = '2',   ///< Euclidean distance alias
-    simsimd_l2sq_k = 'e', ///< Squared Euclidean distance
+    simsimd_dot_k = 'i',     ///< Inner product
+    simsimd_vdot_k = 'v',    ///< Complex inner product
+    simsimd_angular_k = 'a', ///< Angular (cosine) similarity
+    simsimd_l2_k = 'e',      ///< Euclidean distance alias
+    simsimd_l2sq_k = '2',    ///< Squared Euclidean distance
 
     // Binary:
     simsimd_hamming_k = 'h', ///< Hamming distance
@@ -159,11 +160,9 @@ typedef enum {
     simsimd_fma_k = 'f',   ///< Fused Multiply-Add
 
     // Aliases:
-    simsimd_cosine_k = 'c',           ///< Cosine similarity alias
-    simsimd_angular_k = 'c',          ///< Cosine similarity alias
     simsimd_inner_k = 'i',            ///< Inner product alias
-    simsimd_euclidean_k = '2',        ///< Euclidean distance alias
-    simsimd_sqeuclidean_k = 'e',      ///< Squared Euclidean distance alias
+    simsimd_euclidean_k = 'e',        ///< Euclidean distance alias
+    simsimd_sqeuclidean_k = '2',      ///< Squared Euclidean distance alias
     simsimd_manhattan_k = 'h',        ///< Manhattan distance is same as Hamming
     simsimd_tanimoto_k = 'j',         ///< Tanimoto coefficient is same as Jaccard
     simsimd_kullback_leibler_k = 'k', ///< Kullback-Leibler divergence alias
diff --git a/include/simsimd/trigonometry.h b/include/simsimd/trigonometry.h
new file mode 100644
index 00000000..1c5b1260
--- /dev/null
+++ b/include/simsimd/trigonometry.h
@@ -0,0 +1,363 @@
+/**
+ *  @file       trigonometry.h
+ *  @brief      SIMD-accelerated trigonometric element-wise oeprations.
+ *  @author     Ash Vardanian
+ *  @date       July 1, 2023
+ *
+ *  Contains:
+ *  - Sine and Cosine approximations: fast for `f32` vs accurate for `f64`
+ *
+ *  For datatypes:
+ *  - 32-bit IEEE-754 floating point
+ *  - 64-bit IEEE-754 floating point
+ *
+ *  For hardware architectures:
+ *  - Arm: NEON
+ *  - x86: Haswell, Skylake
+ *
+ *  Those functions partially complement the `elementwise.h` module, and are necessary for
+ *  the `geospatial.h` module, among others. Both Haversine and Vincenty's formulas require
+ *  trigonometric functions, and those are the most expensive part of the computation.
+ *
+ *  @section    GLibC IEEE-754-compliant Math Functions
+ *
+ *  The GNU C Library (GLibC) provides a set of IEEE-754-compliant math functions, like `sinf`, `cosf`,
+ *  and double-precsion variants `sin`, `cos`. Those functions are accurate to ~0.55 ULP (units in the
+ *  last place), but can be slow to evaluate. They use a combination of techniques, like:
+ *
+ *  - Taylor series expansions for small values.
+ *  - Table lookups combined with corrections for moderate values.
+ *  - Accurate modulo reduction for large values.
+ *
+ *  The precomputed tables may be the hardest part to accelerate with SIMD, as they contain 440x values,
+ *  each 64-bit wide.
+ *
+ *  https://github.com/lattera/glibc/blob/895ef79e04a953cac1493863bcae29ad85657ee1/sysdeps/ieee754/dbl-64/branred.c#L54
+ *  https://github.com/lattera/glibc/blob/895ef79e04a953cac1493863bcae29ad85657ee1/sysdeps/ieee754/dbl-64/s_sin.c#L84
+ *
+ *  @section    Approximation Algorithms
+ *
+ *  There are several ways to approximate trigonometric functions, and the choice depends on the
+ *  target hardware and the desired precision. Notably:
+ *
+ *  - Taylor Series approximation is a series expansion of a sum of its derivatives at a target point.
+ *    It's easy to derive for differentiable functions, works well for functions smooth around the
+ *    expsansion point, but can perform poorly for functions with singularities or high-frequency
+ *    oscillations.
+ *
+ *  - Pade approximations are rational functions that approximate a function by a ratio of polynomials.
+ *    It often converges faster than Taylor for functions with singularities or steep changes, provides
+ *    good approximations for both smooth and rational functions, but can be more computationally
+ *    intensive to evaluate, and can have holes (undefined points).
+ *
+ *  Moreover, most approximations can be combined with Horner's methods of evaluating polynomials
+ *  to reduce the number of multiplications and additions, and to improve the numerical stability.
+ *  In trigonometry, the Payne-Hanek Range Reduction is another technique used to reduce the argument
+ *  to a smaller range, where the approximation is more accurate.
+ *
+ *  x86 intrinsics: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/
+ *  Arm intrinsics: https://developer.arm.com/architectures/instruction-sets/intrinsics/
+ */
+#ifndef SIMSIMD_TRIGONOMETRY_H
+#define SIMSIMD_TRIGONOMETRY_H
+
+#include "types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*  Serial backends for all numeric types.
+ *  By default they use 32-bit arithmetic, unless the arguments themselves contain 64-bit floats.
+ *  For double-precision computation check out the "*_accurate" variants of those "*_serial" functions.
+ */
+SIMSIMD_PUBLIC void simsimd_sin_f64_serial(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f64_serial(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_sin_f32_serial(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f32_serial(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+
+SIMSIMD_PUBLIC void simsimd_sin_f64_neon(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f64_neon(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_sin_f32_neon(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f32_neon(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+
+/*  SIMD-powered backends for AVX2 CPUs of Haswell generation and newer, using 32-bit arithmetic over 256-bit words.
+ *  First demonstrated in 2011, at least one Haswell-based processor was still being sold in 2022 — the Pentium G3420.
+ *  Practically all modern x86 CPUs support AVX2, FMA, and F16C, making it a perfect baseline for SIMD algorithms.
+ *  On other hand, there is no need to implement AVX2 versions of `f32` and `f64` functions, as those are
+ *  properly vectorized by recent compilers.
+ */
+SIMSIMD_PUBLIC void simsimd_sin_f64_haswell(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f64_haswell(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_sin_f32_haswell(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f32_haswell(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+
+/*  SIMD-powered backends for various generations of AVX512 CPUs.
+ *  Skylake is handy, as it supports masked loads and other operations, avoiding the need for the tail loop.
+ */
+SIMSIMD_PUBLIC void simsimd_sin_f64_skylake(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f64_skylake(simsimd_f64_t const *ins, simsimd_size_t n, simsimd_f64_t const *outs);
+SIMSIMD_PUBLIC void simsimd_sin_f32_skylake(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+SIMSIMD_PUBLIC void simsimd_cos_f32_skylake(simsimd_f32_t const *ins, simsimd_size_t n, simsimd_f32_t const *outs);
+
+/**
+ *  @brief  Computes an approximate sine of the given angle in radians with 3 ULP error bound.
+ *  @see    xfastsinf_u3500 in SLEEF library
+ *  @param  angle The input angle in radians.
+ *  @return The approximate sine of the input angle.
+ */
+SIMSIMD_PUBLIC simsimd_f32_t simsimd_f32_sin(simsimd_f32_t angle) {
+    // Variables
+    int multiple_of_pi;          // The integer multiple of π in the input angle
+    simsimd_f32_t result;        // The final result of the sine computation
+    simsimd_f32_t angle_squared; // Square of the reduced angle
+
+    // Constants for argument reduction
+    simsimd_f32_t const pi_reciprocal = 0.31830988618379067154f; // 1/π
+    simsimd_f32_t const pi = 3.14159265358979323846f;            // π
+
+    // Compute multiple_of_pi = round(angle / π)
+    simsimd_f32_t quotient = angle * pi_reciprocal;
+    if (quotient >= 0.0f) { multiple_of_pi = (int)(quotient + 0.5f); }
+    else { multiple_of_pi = (int)(quotient - 0.5f); }
+
+    // Reduce the angle: angle = angle - (multiple_of_pi * π)
+    angle = angle - multiple_of_pi * pi;
+
+    // Compute the square of the reduced angle
+    angle_squared = angle * angle;
+
+    // Polynomial coefficients for sine approximation (minimax polynomial)
+    simsimd_f32_t const coeff_5 = -0.0001881748176f; // Coefficient for x^5 term
+    simsimd_f32_t const coeff_3 = 0.008323502727f;   // Coefficient for x^3 term
+    simsimd_f32_t const coeff_1 = -0.1666651368f;    // Coefficient for x term
+
+    // Compute the polynomial approximation
+    simsimd_f32_t polynomial = coeff_5;
+    polynomial = polynomial * angle_squared + coeff_3; // polynomial = (coeff_5 * x^2) + coeff_3
+    polynomial = polynomial * angle_squared + coeff_1; // polynomial = polynomial * x^2 + coeff_1
+
+    // Compute the final result: sine approximation
+    result = ((angle_squared * angle) * polynomial) + angle; // result = (x^3 * polynomial) + x
+
+    // If multiple_of_pi is odd, flip the sign of the result
+    if ((multiple_of_pi & 1) != 0) { result = -result; }
+
+    return result;
+}
+
+/**
+ *  @brief  Computes an approximate cosine of the given angle in radians with 3 ULP error bound.
+ *  @see    xfastcosf_u3500 in SLEEF library
+ *  @param  angle The input angle in radians.
+ *  @return The approximate cosine of the input angle.
+ */
+SIMSIMD_PUBLIC simsimd_f32_t simsimd_f32_cos(simsimd_f32_t angle) {
+    // Variables
+    int multiple_of_pi;          // The integer multiple of π in the input angle
+    simsimd_f32_t result;        // The final result of the cosine computation
+    simsimd_f32_t angle_squared; // Square of the reduced angle
+    simsimd_f32_t reduced_angle; // The angle reduced to the primary interval
+
+    // Constants for argument reduction
+    simsimd_f32_t const pi_reciprocal = 0.31830988618379067154f; // 1/π
+    simsimd_f32_t const pi = 3.14159265358979323846f;            // π
+
+    // Compute multiple_of_pi = round(angle * (1/π) - 0.5)
+    simsimd_f32_t quotient = angle * pi_reciprocal - 0.5f;
+    if (quotient >= 0.0f) { multiple_of_pi = (int)(quotient + 0.5f); }
+    else { multiple_of_pi = (int)(quotient - 0.5f); }
+
+    // Reduce the angle: angle = angle - (multiple_of_pi + 0.5) * π
+    reduced_angle = angle - (multiple_of_pi + 0.5f) * pi;
+
+    // Compute the square of the reduced angle
+    angle_squared = reduced_angle * reduced_angle;
+
+    // Polynomial coefficients for cosine approximation (minimax polynomial)
+    simsimd_f32_t const coeff_5 = -0.0001881748176f; // Coefficient for x^5 term
+    simsimd_f32_t const coeff_3 = 0.008323502727f;   // Coefficient for x^3 term
+    simsimd_f32_t const coeff_1 = -0.1666651368f;    // Coefficient for x^1 term
+
+    // Compute the polynomial approximation
+    simsimd_f32_t polynomial = coeff_5;
+    polynomial = polynomial * angle_squared + coeff_3; // polynomial = (coeff_5 * x^2) + coeff_3
+    polynomial = polynomial * angle_squared + coeff_1; // polynomial = polynomial * x^2 + coeff_1
+
+    // Compute the final result: cosine approximation
+    result = ((angle_squared * reduced_angle) * polynomial) + reduced_angle; // result = (x^3 * polynomial) + x
+
+    // If multiple_of_pi is even, flip the sign of the result
+    if ((multiple_of_pi & 1) == 0) { result = -result; }
+
+    return result;
+}
+
+/**
+ *  @brief  Computes an approximate cosine of the given angle in radians with 0 ULP error bound.
+ *  @see    `xsin` in SLEEF library
+ *  @param  angle The input angle in radians.
+ *  @return The approximate cosine of the input angle.
+ */
+SIMSIMD_PUBLIC simsimd_f64_t simsimd_f64_sin(simsimd_f64_t angle) {
+    // Constants for bit manipulation
+    simsimd_i64_t const negative_zero = 0x8000000000000000LL; // Hexadecimal value of -0.0 in IEEE 754
+
+    // Union for bit manipulation between simsimd_f64_t and simsimd_i64_t
+    union {
+        simsimd_f64_t f64;
+        simsimd_i64_t i64;
+    } converter;
+
+    // Preserve the original angle for special case handling (negative zero)
+    simsimd_f64_t original_angle = angle;
+
+    // Constants for argument reduction
+    simsimd_f64_t const pi_reciprocal = 0.318309886183790671537767526745028724; // 1/π
+    simsimd_f64_t const pi_high = 3.141592653589793116;                         // High-precision part of π
+    simsimd_f64_t const pi_low = 1.2246467991473532072e-16;                     // Low-precision part of π
+
+    // Compute multiple_of_pi = round(angle / π)
+    simsimd_f64_t quotient = angle * pi_reciprocal;
+    int multiple_of_pi = (int)(quotient < 0 ? quotient - 0.5 : quotient + 0.5);
+
+    // Reduce the angle: angle = angle - (multiple_of_pi * π)
+    angle = (multiple_of_pi * -pi_high) + angle;
+    angle = (multiple_of_pi * -pi_low) + angle;
+
+    // Compute the square of the reduced argument
+    simsimd_f64_t argument_square = angle * angle;
+
+    // Adjust the sign of the angle if multiple_of_pi is odd
+    if ((multiple_of_pi & 1) != 0) { angle = -angle; }
+
+    // Compute higher powers of the argument
+    simsimd_f64_t argument_power_4 = argument_square * argument_square;   // angle^4
+    simsimd_f64_t argument_power_8 = argument_power_4 * argument_power_4; // angle^8
+
+    // Polynomial coefficients for sine approximation (minimax polynomial)
+    simsimd_f64_t const coeff_0 = 0.00833333333333332974823815;
+    simsimd_f64_t const coeff_1 = -0.000198412698412696162806809;
+    simsimd_f64_t const coeff_2 = 2.75573192239198747630416e-06;
+    simsimd_f64_t const coeff_3 = -2.50521083763502045810755e-08;
+    simsimd_f64_t const coeff_4 = 1.60590430605664501629054e-10;
+    simsimd_f64_t const coeff_5 = -7.64712219118158833288484e-13;
+    simsimd_f64_t const coeff_6 = 2.81009972710863200091251e-15;
+    simsimd_f64_t const coeff_7 = -7.97255955009037868891952e-18;
+    simsimd_f64_t const coeff_8 = -0.166666666666666657414808;
+
+    // Compute higher-degree polynomial terms
+    simsimd_f64_t temp1 = (argument_square * coeff_7) + coeff_6;  // temp1 = s * c7 + c6
+    simsimd_f64_t temp2 = (argument_square * coeff_5) + coeff_4;  // temp2 = s * c5 + c4
+    simsimd_f64_t poly_high = (argument_power_4 * temp1) + temp2; // poly_high = s^4 * temp1 + temp2
+
+    // Compute lower-degree polynomial terms
+    simsimd_f64_t temp3 = (argument_square * coeff_3) + coeff_2; // temp3 = s * c3 + c2
+    simsimd_f64_t temp4 = (argument_square * coeff_1) + coeff_0; // temp4 = s * c1 + c0
+    simsimd_f64_t poly_low = (argument_power_4 * temp3) + temp4; // poly_low = s^4 * temp3 + temp4
+
+    // Combine polynomial terms
+    simsimd_f64_t result = (argument_power_8 * poly_high) + poly_low; // result = s^8 * poly_high + poly_low
+    result = (result * argument_square) + coeff_8;                    // result = result * s + c8
+    result = (argument_square * (result * angle)) + angle;            // result = s * (result * angle) + angle
+
+    // Handle the special case of negative zero input
+    converter.f64 = original_angle;
+    if (converter.i64 == negative_zero) { result = original_angle; }
+
+    return result;
+}
+
+/**
+ *  @brief  Computes an approximate cosine of the given angle in radians with 0 ULP error bound.
+ *  @see    `xcos` in SLEEF library
+ *  @param  angle The input angle in radians.
+ *  @return The approximate cosine of the input angle.
+ */
+SIMSIMD_PUBLIC simsimd_f64_t simsimd_f64_cos(simsimd_f64_t angle) {
+    // Constants for bit manipulation
+    simsimd_i64_t const negative_zero = 0x8000000000000000LL; // Hexadecimal value of -0.0 in IEEE 754
+
+    // Union for bit manipulation between simsimd_f64_t and simsimd_i64_t
+    union {
+        simsimd_f64_t f64;
+        simsimd_i64_t i64;
+    } converter;
+
+    // Variables
+    simsimd_f64_t result;                 // The final result of the cosine computation
+    simsimd_f64_t angle_squared;          // Square of the reduced angle
+    simsimd_f64_t original_angle = angle; // Preserve the original angle for special case handling
+    int multiple_of_pi;                   // The integer multiple of π in the input angle
+
+    // Constants for argument reduction
+    simsimd_f64_t const pi_reciprocal = 0.318309886183790671537767526745028724; // 1/π
+    simsimd_f64_t const pi_high = 3.141592653589793116;                         // High-precision part of π
+    simsimd_f64_t const pi_low = 1.2246467991473532072e-16;                     // Low-precision part of π
+
+    // Compute multiple_of_pi = 2 * round(angle * (1/π) - 0.5) + 1
+    simsimd_f64_t quotient = angle * pi_reciprocal - 0.5;
+    int temp = (int)(quotient < 0 ? quotient - 0.5 : quotient + 0.5);
+    multiple_of_pi = 2 * temp + 1;
+
+    // Reduce the angle: angle = angle - multiple_of_pi * (π / 2)
+    angle = angle - multiple_of_pi * (pi_high * 0.5);
+    angle = angle - multiple_of_pi * (pi_low * 0.5);
+
+    // Compute the square of the reduced angle
+    angle_squared = angle * angle;
+
+    // Adjust the sign of the angle if necessary
+    if ((multiple_of_pi & 2) == 0) { angle = -angle; }
+
+    // Compute higher powers of the argument
+    simsimd_f64_t angle_power_2 = angle_squared;
+    simsimd_f64_t angle_power_4 = angle_power_2 * angle_power_2; // angle^4
+    simsimd_f64_t angle_power_8 = angle_power_4 * angle_power_4; // angle^8
+
+    // Polynomial coefficients for cosine approximation (minimax polynomial)
+    simsimd_f64_t const coeff_0 = 0.00833333333333332974823815;
+    simsimd_f64_t const coeff_1 = -0.000198412698412696162806809;
+    simsimd_f64_t const coeff_2 = 2.75573192239198747630416e-06;
+    simsimd_f64_t const coeff_3 = -2.50521083763502045810755e-08;
+    simsimd_f64_t const coeff_4 = 1.60590430605664501629054e-10;
+    simsimd_f64_t const coeff_5 = -7.64712219118158833288484e-13;
+    simsimd_f64_t const coeff_6 = 2.81009972710863200091251e-15;
+    simsimd_f64_t const coeff_7 = -7.97255955009037868891952e-18;
+    simsimd_f64_t const coeff_8 = -0.166666666666666657414808;
+
+    // Compute higher-degree polynomial terms
+    simsimd_f64_t temp1 = (angle_squared * coeff_7) + coeff_6; // temp1 = s * c7 + c6
+    simsimd_f64_t temp2 = (angle_squared * coeff_5) + coeff_4; // temp2 = s * c5 + c4
+    simsimd_f64_t poly_high = (angle_power_4 * temp1) + temp2; // poly_high = s^4 * temp1 + temp2
+
+    // Compute lower-degree polynomial terms
+    simsimd_f64_t temp3 = (angle_squared * coeff_3) + coeff_2; // temp3 = s * c3 + c2
+    simsimd_f64_t temp4 = (angle_squared * coeff_1) + coeff_0; // temp4 = s * c1 + c0
+    simsimd_f64_t poly_low = (angle_power_4 * temp3) + temp4;  // poly_low = s^4 * temp3 + temp4
+
+    // Combine polynomial terms
+    result = (angle_power_8 * poly_high) + poly_low;     // result = s^8 * poly_high + poly_low
+    result = (result * angle_squared) + coeff_8;         // result = result * s + c8
+    result = (angle_squared * (result * angle)) + angle; // result = s * (result * angle) + angle
+
+    // Return the final result
+    return result;
+}
+
+#if _SIMSIMD_TARGET_X86
+#if SIMSIMD_TARGET_HASWELL
+#pragma GCC push_options
+#pragma GCC target("avx2", "f16c", "fma")
+#pragma clang attribute push(__attribute__((target("avx2,f16c,fma"))), apply_to = function)
+
+#pragma clang attribute pop
+#pragma GCC pop_options
+#endif // SIMSIMD_TARGET_HASWELL
+#endif // _SIMSIMD_TARGET_X86
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/scripts/bench.cxx b/scripts/bench.cxx
index 6907f8c6..1661bd63 100644
--- a/scripts/bench.cxx
+++ b/scripts/bench.cxx
@@ -511,6 +511,7 @@ void measure_elementwise(bm::State &state, kernel_at kernel, kernel_at baseline,
         }
         else if constexpr (kernel_ak == simsimd_sum_k) { baseline(a.data(), c.data(), a.dimensions(), d.data()); }
         else if constexpr (kernel_ak == simsimd_scale_k) { baseline(a.data(), a.dimensions(), alpha, beta, d.data()); }
+        else { baseline(a.data(), a.dimensions(), d.data()); }
     };
     auto call_contender = [&](vector_t const &a, vector_t const &b, vector_t const &c, vector_t &d) {
         if constexpr (kernel_ak == simsimd_wsum_k) {
@@ -521,6 +522,7 @@ void measure_elementwise(bm::State &state, kernel_at kernel, kernel_at baseline,
         }
         else if constexpr (kernel_ak == simsimd_sum_k) { kernel(a.data(), c.data(), a.dimensions(), d.data()); }
         else if constexpr (kernel_ak == simsimd_scale_k) { kernel(a.data(), a.dimensions(), alpha, beta, d.data()); }
+        else { kernel(a.data(), a.dimensions(), d.data()); }
     };
 
     // Let's average the distance results over many quads.
@@ -580,6 +582,96 @@ void measure_elementwise(bm::State &state, kernel_at kernel, kernel_at baseline,
     state.counters["pairs"] = bm::Counter(iterations, bm::Counter::kIsRate);
 }
 
+/**
+ *  @brief Measures the performance of a geospatial operations between 4 arrays: 2 latitudes, 2 longitudes.
+ *  @tparam pair_at The type representing the vector pair used in the measurement.
+ *  @tparam kernel_at The type of the kernel function (default is void).
+ *  @param state The benchmark state object provided by Google Benchmark.
+ *  @param kernel The kernel function to benchmark.
+ *  @param baseline The baseline function to compare against.
+ *  @param l2_metric The L2 metric function to compute the error
+ *  @param dimensions The number of dimensions in the vectors.
+ */
+template <typename pair_at, typename kernel_at = void, typename l2_metric_at = void>
+void measure_geospatial(bm::State &state, kernel_at kernel, kernel_at baseline, l2_metric_at l2_metric,
+                        std::size_t dimensions) {
+
+    using pair_t = pair_at;
+    using vector_t = typename pair_at::vector_t;
+    using scalar_t = typename vector_t::scalar_t;
+    struct quad_t {
+        vector_t lat1, lon1, lat2, lon2;
+    };
+
+    using distances_t = vector_gt<simsimd_f64_k>;
+    auto call_baseline = [&](quad_t const &quad, distances_t &d) {
+        baseline(quad.lat1.data(), quad.lon1.data(), quad.lat2.data(), quad.lon2.data(), quad.lat1.dimensions(),
+                 d.data());
+    };
+    auto call_contender = [&](quad_t const &quad, distances_t &d) {
+        kernel(quad.lat1.data(), quad.lon1.data(), quad.lat2.data(), quad.lon2.data(), quad.lat1.dimensions(),
+               d.data());
+    };
+
+    // Let's average the distance results over many quads.
+    constexpr std::size_t quads_count = 128;
+    std::vector<quad_t> quads(quads_count);
+
+    std::random_device random_device;
+    std::mt19937 random_generator(random_device());
+    /// Latitude range (-90 to 90 degrees) in radians
+    std::uniform_real_distribution<scalar_t> lat_dist(-M_PI_2, M_PI_2);
+    /// Longitude range (-180 to 180 degrees) in radians
+    std::uniform_real_distribution<scalar_t> lon_dist(-M_PI, M_PI);
+    for (std::size_t i = 0; i != quads.size(); ++i) {
+        auto &quad = quads[i];
+        quad.lat1 = quad.lat2 = quad.lon1 = quad.lon2 = vector_t(dimensions);
+        std::generate(quad.lat1.data(), quad.lat1.data() + dimensions, [&]() { return lat_dist(random_generator); });
+        std::generate(quad.lat2.data(), quad.lat2.data() + dimensions, [&]() { return lat_dist(random_generator); });
+        std::generate(quad.lon1.data(), quad.lon1.data() + dimensions, [&]() { return lon_dist(random_generator); });
+        std::generate(quad.lon2.data(), quad.lon2.data() + dimensions, [&]() { return lon_dist(random_generator); });
+    }
+
+    // Initialize the output buffers for distance calculations.
+    distances_t baseline_d(dimensions), contender_d(dimensions), zeros(dimensions);
+    std::vector<simsimd_distance_t> l2_metric_from_baseline(quads.size());
+    std::vector<simsimd_distance_t> l2_baseline_result_norm(quads.size());
+    std::vector<simsimd_distance_t> l2_contender_result_norm(quads.size());
+    zeros.set(0);
+    double mean_delta = 0, mean_relative_error = 0;
+    for (std::size_t i = 0; i != quads.size(); ++i) {
+        quad_t &quad = quads[i];
+        call_baseline(quad, baseline_d);
+        call_contender(quad, contender_d);
+        l2_metric(baseline_d.data(), contender_d.data(), dimensions, &l2_metric_from_baseline[i]);
+        l2_metric(baseline_d.data(), zeros.data(), dimensions, &l2_baseline_result_norm[i]);
+        l2_metric(contender_d.data(), zeros.data(), dimensions, &l2_contender_result_norm[i]);
+
+        mean_delta += std::abs(l2_metric_from_baseline[i]);
+        mean_relative_error +=
+            std::abs(l2_metric_from_baseline[i]) / (std::max)(l2_baseline_result_norm[i], l2_contender_result_norm[i]);
+    }
+    mean_delta /= quads_count;
+    mean_relative_error /= quads_count;
+
+    // The actual benchmarking loop.
+    std::size_t iterations = 0;
+    for (auto _ : state) {
+        quad_t &quad = quads[iterations & (quads_count - 1)];
+        call_contender(quad, contender_d);
+        iterations++;
+    }
+
+    std::size_t const bytes_per_call =                            //
+        quads[0].lat1.size_bytes() + quads[0].lat2.size_bytes() + //
+        quads[0].lon1.size_bytes() + quads[0].lon2.size_bytes();
+    // Measure the mean absolute delta and relative error.
+    state.counters["abs_delta"] = mean_delta;
+    state.counters["relative_error"] = mean_relative_error;
+    state.counters["bytes"] = bm::Counter(iterations * bytes_per_call, bm::Counter::kIsRate);
+    state.counters["pairs"] = bm::Counter(iterations * dimensions, bm::Counter::kIsRate);
+}
+
 template <simsimd_datatype_t datatype_ak, typename metric_at = void>
 void dense_(std::string name, metric_at *distance_func, metric_at *baseline_func) {
     using pair_t = vectors_pair_gt<datatype_ak>;
@@ -590,8 +682,8 @@ void dense_(std::string name, metric_at *distance_func, metric_at *baseline_func
         ->Threads(default_threads);
 }
 
-template <simsimd_datatype_t datatype_ak, simsimd_kernel_kind_t kernel_ak, typename kernel_at = void,
-          typename l2_metric_at = void>
+template <simsimd_datatype_t datatype_ak, simsimd_kernel_kind_t kernel_ak = simsimd_kernel_unknown_k,
+          typename kernel_at = void, typename l2_metric_at = void>
 void elementwise_(std::string name, kernel_at *kernel_func, kernel_at *baseline_func, l2_metric_at *l2_metric_func) {
     using pair_t = vectors_pair_gt<datatype_ak>;
     std::string bench_name = name + "<" + std::to_string(dense_dimensions) + "d>";
@@ -601,6 +693,16 @@ void elementwise_(std::string name, kernel_at *kernel_func, kernel_at *baseline_
         ->Threads(default_threads);
 }
 
+template <simsimd_datatype_t datatype_ak, typename kernel_at = void, typename l2_metric_at = void>
+void geospatial_(std::string name, kernel_at *kernel_func, kernel_at *baseline_func, l2_metric_at *l2_metric_func) {
+    using pair_t = vectors_pair_gt<datatype_ak>;
+    std::string bench_name = name + "<" + std::to_string(dense_dimensions) + "d>";
+    bm::RegisterBenchmark(bench_name.c_str(), measure_geospatial<pair_t, kernel_at *, l2_metric_at *>, kernel_func,
+                          baseline_func, l2_metric_func, dense_dimensions)
+        ->MinTime(default_seconds)
+        ->Threads(default_threads);
+}
+
 template <simsimd_datatype_t datatype_ak, typename metric_at = void>
 void sparse_(std::string name, metric_at *distance_func, metric_at *baseline_func) {
 
@@ -647,6 +749,57 @@ void l2_with_stl(scalar_at const *a, scalar_at const *b, simsimd_size_t n, simsi
     *result = std::sqrt(sum);
 }
 
+template <typename scalar_at, typename accumulator_at = scalar_at>
+simsimd_distance_t haversine_one_with_stl(scalar_at lat1, scalar_at lon1, scalar_at lat2, scalar_at lon2) {
+    // Convert angle to radians:
+    // lat1 *= M_PI / 180, lon1 *= M_PI / 180;
+    // lat2 *= M_PI / 180, lon2 *= M_PI / 180;
+    accumulator_at dlat = lat2 - lat1;
+    accumulator_at dlon = lon2 - lon1;
+    accumulator_at a = //
+        std::sin(dlat / 2) * std::sin(dlat / 2) +
+        std::cos(lat1) * std::cos(lat2) * std::sin(dlon / 2) * std::sin(dlon / 2);
+    accumulator_at c = 2 * std::atan2(std::sqrt(a), std::sqrt(1 - a));
+    return c;
+}
+
+template <typename scalar_at, typename accumulator_at = scalar_at>
+void haversine_with_stl(                              //
+    scalar_at const *a_lats, scalar_at const *a_lons, //
+    scalar_at const *b_lats, scalar_at const *b_lons, //
+    simsimd_size_t n, simsimd_distance_t *results) {
+    for (simsimd_size_t i = 0; i != n; ++i) {
+        scalar_at lat1 = a_lats[i], lon1 = a_lons[i];
+        scalar_at lat2 = b_lats[i], lon2 = b_lons[i];
+        results[i] = haversine_one_with_stl<scalar_at, accumulator_at>(lat1, lon1, lat2, lon2);
+    }
+}
+
+template <typename scalar_at>
+struct sin_with_stl {
+    scalar_at operator()(scalar_at const &a) const { return std::sin(a); }
+};
+template <typename scalar_at>
+struct cos_with_stl {
+    scalar_at operator()(scalar_at const &a) const { return std::cos(a); }
+};
+
+namespace av::simsimd {
+struct sin {
+    simsimd_f32_t operator()(simsimd_f32_t const &a) const { return simsimd_f32_sin(a); }
+    simsimd_f64_t operator()(simsimd_f64_t const &a) const { return simsimd_f64_sin(a); }
+};
+struct cos {
+    simsimd_f32_t operator()(simsimd_f32_t const &a) const { return simsimd_f32_cos(a); }
+    simsimd_f64_t operator()(simsimd_f64_t const &a) const { return simsimd_f64_cos(a); }
+};
+} // namespace av::simsimd
+
+template <typename scalar_at, typename kernel_at>
+void elementwise_with_stl(scalar_at const *ins, simsimd_size_t n, scalar_at *outs) {
+    for (simsimd_size_t i = 0; i != n; ++i) outs[i] = kernel_at {}(ins[i]);
+}
+
 #if SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS
 
 void dot_f32_blas(simsimd_f32_t const *a, simsimd_f32_t const *b, simsimd_size_t n, simsimd_distance_t *result) {
@@ -768,6 +921,24 @@ int main(int argc, char **argv) {
     constexpr simsimd_datatype_t f16c_k = simsimd_f16c_k;
     constexpr simsimd_datatype_t bf16c_k = simsimd_bf16c_k;
 
+    elementwise_<f32_k>("f32_sin_stl", elementwise_with_stl<simsimd_f32_t, sin_with_stl<simsimd_f32_t>>,
+                        elementwise_with_stl<simsimd_f32_t, sin_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f32_t>);
+    elementwise_<f32_k>("f32_cos_stl", elementwise_with_stl<simsimd_f32_t, cos_with_stl<simsimd_f32_t>>,
+                        elementwise_with_stl<simsimd_f32_t, cos_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f32_t>);
+    elementwise_<f32_k>("f32_sin", elementwise_with_stl<simsimd_f32_t, av::simsimd::sin>,
+                        elementwise_with_stl<simsimd_f32_t, sin_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f32_t>);
+    elementwise_<f32_k>("f32_cos", elementwise_with_stl<simsimd_f32_t, av::simsimd::cos>,
+                        elementwise_with_stl<simsimd_f32_t, cos_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f32_t>);
+
+    elementwise_<f64_k>("f64_sin_stl", elementwise_with_stl<simsimd_f64_t, sin_with_stl<simsimd_f64_t>>,
+                        elementwise_with_stl<simsimd_f64_t, sin_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f64_t>);
+    elementwise_<f64_k>("f64_cos_stl", elementwise_with_stl<simsimd_f64_t, cos_with_stl<simsimd_f64_t>>,
+                        elementwise_with_stl<simsimd_f64_t, cos_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f64_t>);
+    elementwise_<f64_k>("f64_sin", elementwise_with_stl<simsimd_f64_t, av::simsimd::sin>,
+                        elementwise_with_stl<simsimd_f64_t, sin_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f64_t>);
+    elementwise_<f64_k>("f64_cos", elementwise_with_stl<simsimd_f64_t, av::simsimd::cos>,
+                        elementwise_with_stl<simsimd_f64_t, cos_with_stl<simsimd_f64_t>>, l2_with_stl<simsimd_f64_t>);
+
 #if SIMSIMD_BUILD_BENCHMARKS_WITH_CBLAS
 
     dense_<f32_k>("dot_f32_blas", dot_f32_blas, simsimd_dot_f32_accurate);
@@ -1179,6 +1350,11 @@ int main(int argc, char **argv) {
     elementwise_<i8_k, simsimd_wsum_k>("wsum_i8_serial", simsimd_wsum_i8_serial, simsimd_wsum_i8_accurate,
                                        simsimd_l2_i8_serial);
 
+    geospatial_<f32_k>("haversine_f32_serial", haversine_with_stl<simsimd_f32_t>,
+                       haversine_with_stl<simsimd_f32_t, simsimd_f64_t>, l2_with_stl<simsimd_f64_t>);
+    geospatial_<f64_k>("haversine_f64_serial", haversine_with_stl<simsimd_f64_t>, haversine_with_stl<simsimd_f64_t>,
+                       l2_with_stl<simsimd_f64_t>);
+
     bm::RunSpecifiedBenchmarks();
     bm::Shutdown();
     return 0;
diff --git a/scripts/test.c b/scripts/test.c
index f032bb34..6c21f830 100644
--- a/scripts/test.c
+++ b/scripts/test.c
@@ -283,7 +283,80 @@ void test_xd_index(void) {
     }
 }
 
-void test_saturating_arithmeic(void) {}
+/**
+ *  @brief  Goes through all possible `f32` values in a relevant range, computing
+ */
+void test_approximate_math(void) {
+    union {
+        simsimd_f32_t f32;
+        simsimd_u32_t u32;
+    } x;
+
+    struct error_aggregator {
+        simsimd_f64_t absolute_error;
+        simsimd_f64_t relative_error;
+        simsimd_f64_t max_error;
+    } f32_cos_errors = {0, 0, 0}, f32_sin_errors = {0, 0, 0}, f64_cos_errors = {0, 0, 0}, f64_sin_errors = {0, 0, 0};
+
+    // Test all possible values of f32 within ranges: [-π, -1], [-1, -0], [0, 1], [1, π].
+    simsimd_size_t count_tests = 0;
+    for (x.u32 = 0; x.u32 < 0xFFFFFFFF; ++x.u32) {
+        if (x.f32 >= -3.14159265358979323846f && x.f32 <= 3.14159265358979323846f) {
+
+            simsimd_f64_t f64_sin_baseline = sin(x.f32);
+            simsimd_f64_t f64_sin_approx = simsimd_f64_sin(x.f32);
+            simsimd_f64_t f64_sin_diff = fabs(f64_sin_baseline - f64_sin_approx);
+            simsimd_f64_t f64_sin_max = fmax(fabs(f64_sin_baseline), fabs(f64_sin_approx));
+            f64_sin_errors.absolute_error += f64_sin_diff;
+            f64_sin_errors.relative_error += f64_sin_max != 0 ? f64_sin_diff / f64_sin_max : 0;
+            f64_sin_errors.max_error = fmax(f64_sin_errors.max_error, f64_sin_diff);
+
+            simsimd_f32_t f32_sin_baseline = sinf(x.f32);
+            simsimd_f32_t f32_sin_approx = simsimd_f32_sin(x.f32);
+            simsimd_f32_t f32_sin_diff = fabsf(f32_sin_baseline - f32_sin_approx);
+            simsimd_f32_t f32_sin_max = fmaxf(fabsf(f32_sin_baseline), fabsf(f32_sin_approx));
+            f32_sin_errors.absolute_error += f32_sin_diff;
+            f32_sin_errors.relative_error += f32_sin_max != 0 ? f32_sin_diff / f32_sin_max : 0;
+            f32_sin_errors.max_error = fmax(f32_sin_errors.max_error, f32_sin_diff);
+
+            simsimd_f64_t f64_cos_baseline = cos(x.f32);
+            simsimd_f64_t f64_cos_approx = simsimd_f64_cos(x.f32);
+            simsimd_f64_t f64_cos_diff = fabs(f64_cos_baseline - f64_cos_approx);
+            simsimd_f64_t f64_cos_max = fmax(fabs(f64_cos_baseline), fabs(f64_cos_approx));
+            f64_cos_errors.absolute_error += f64_cos_diff;
+            f64_cos_errors.relative_error += f64_cos_max != 0 ? f64_cos_diff / f64_cos_max : 0;
+            f64_cos_errors.max_error = fmax(f64_cos_errors.max_error, f64_cos_diff);
+
+            simsimd_f32_t f32_cos_baseline = cosf(x.f32);
+            simsimd_f32_t f32_cos_approx = simsimd_f32_cos(x.f32);
+            simsimd_f32_t f32_cos_diff = fabsf(f32_cos_baseline - f32_cos_approx);
+            simsimd_f32_t f32_cos_max = fmaxf(fabsf(f32_cos_baseline), fabsf(f32_cos_approx));
+            f32_cos_errors.absolute_error += f32_cos_diff;
+            f32_cos_errors.relative_error += f32_cos_max != 0 ? f32_cos_diff / f32_cos_max : 0;
+            f32_cos_errors.max_error = fmax(f32_cos_errors.max_error, f32_cos_diff);
+
+            ++count_tests;
+        }
+    }
+
+    f32_sin_errors.absolute_error /= count_tests;
+    f32_sin_errors.relative_error *= 100 / count_tests;
+    f32_cos_errors.absolute_error /= count_tests;
+    f32_cos_errors.relative_error *= 100 / count_tests;
+    f64_sin_errors.absolute_error /= count_tests;
+    f64_sin_errors.relative_error *= 100 / count_tests;
+    f64_cos_errors.absolute_error /= count_tests;
+    f64_cos_errors.relative_error *= 100 / count_tests;
+
+    printf("f32 sin errors: %f or %.6f%%, peaking at %f\n", f32_sin_errors.absolute_error,
+           f32_sin_errors.relative_error, f32_sin_errors.max_error);
+    printf("f32 cos errors: %f or %.6f%%, peaking at %f\n", f32_cos_errors.absolute_error,
+           f32_cos_errors.relative_error, f32_cos_errors.max_error);
+    printf("f64 sin errors: %f or %.6f%%, peaking at %f\n", f64_sin_errors.absolute_error,
+           f64_sin_errors.relative_error, f64_sin_errors.max_error);
+    printf("f64 cos errors: %f or %.6f%%, peaking at %f\n", f64_cos_errors.absolute_error,
+           f64_cos_errors.relative_error, f64_cos_errors.max_error);
+}
 
 /**
  *  @brief  A trivial test that calls every implemented distance function and their dispatch versions
@@ -359,6 +432,7 @@ int main(int argc, char **argv) {
     print_capabilities();
     test_utilities();
     test_saturating_arithmetic();
+    test_approximate_math();
     test_xd_index();
     test_distance_from_itself();
     printf("All tests passed.\n");