From 1c7c344a483b19ede30b434bb74344ad354f45a0 Mon Sep 17 00:00:00 2001 From: jianfengmao Date: Tue, 12 Nov 2024 09:24:08 -0700 Subject: [PATCH 1/2] Handle signature-less callables for UDF calls --- .../engine/util/PyCallableWrapper.java | 1 + .../engine/util/PyCallableWrapperJpyImpl.java | 22 ++++++++++++++++--- py/server/deephaven/_udf.py | 19 +++++++++++----- py/server/tests/test_udf_scalar_args.py | 6 +++++ py/server/tests/test_vectorization.py | 10 +++++++++ 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java index bcbcfb5462a..604a992d28d 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java @@ -56,6 +56,7 @@ public String getName() { class Signature { private final List parameters; private final Class returnType; + public static Signature EMPTY_SIGNATURE = new Signature(List.of(), null); public Signature(List parameters, Class returnType) { this.parameters = parameters; diff --git a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java index fe8dc367439..a9ba9e863b0 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java @@ -193,18 +193,29 @@ private void prepareSignature() { vectorized = false; } pyUdfDecorator = dh_udf_module.call("_udf_parser", pyCallable); - signatureString = pyUdfDecorator.getAttribute("signature").toString(); + // The Python UDF parser failed to get/parse the callable's signature. This is likely due to it being + // a function in an extension module or a signature-less/multi-signature builtin function such as 'max'. + if (pyUdfDecorator.isNone()) { + pyUdfDecorator = null; + signature = Signature.EMPTY_SIGNATURE; + } else { + signatureString = pyUdfDecorator.getAttribute("signature").toString(); + } } @Override public void parseSignature() { - if (signatureString != null) { + if (signature != null) { return; } prepareSignature(); + if (signature == Signature.EMPTY_SIGNATURE) { + return; + } + // the 'types' field of a vectorized function follows the pattern of '[ilhfdb?O]*->[ilhfdb?O]', // eg. [ll->d] defines two int64 (long) arguments and a double return type. if (signatureString == null || signatureString.isEmpty()) { @@ -286,7 +297,12 @@ public static boolean isLosslessWideningPrimitiveConversion(@NotNull Class or return false; } + public void verifyArguments(Class[] argTypes) { + if (signature == Signature.EMPTY_SIGNATURE) { + return; + } + String callableName = pyCallable.getAttribute("__name__").toString(); List parameters = signature.getParameters(); @@ -358,7 +374,7 @@ public void verifyArguments(Class[] argTypes) { // In vectorized mode, we want to call the vectorized function directly. public PyObject vectorizedCallable() { - if (numbaVectorized) { + if (numbaVectorized || pyUdfDecorator == null) { return pyCallable; } else { return pyUdfDecorator.call("__call__", this.argTypesStr, true); diff --git a/py/server/deephaven/_udf.py b/py/server/deephaven/_udf.py index 5163a51b577..1140c8a08e6 100644 --- a/py/server/deephaven/_udf.py +++ b/py/server/deephaven/_udf.py @@ -485,7 +485,7 @@ def _parse_np_ufunc_signature(fn: numpy.ufunc) -> _ParsedSignature: return p_sig -def _parse_signature(fn: Callable) -> _ParsedSignature: +def _parse_signature(fn: Callable) -> Optional[_ParsedSignature]: """ Parse the signature of a function """ if numba: @@ -496,10 +496,14 @@ def _parse_signature(fn: Callable) -> _ParsedSignature: return _parse_np_ufunc_signature(fn) else: p_sig = _ParsedSignature(fn=fn) - if sys.version_info >= (3, 10): - sig = inspect.signature(fn, eval_str=True) # novermin - else: - sig = inspect.signature(fn) + try: + if sys.version_info >= (3, 10): + sig = inspect.signature(fn, eval_str=True) # novermin + else: + sig = inspect.signature(fn) + except ValueError: + # some built-in functions don't have a signature, neither do some functions from C extensions + return None for n, p in sig.parameters.items(): # when from __future__ import annotations is used, the annotation is a string, we need to eval it to get @@ -513,7 +517,7 @@ def _parse_signature(fn: Callable) -> _ParsedSignature: p_sig.ret_annotation = _parse_return_annotation(t) return p_sig -def _udf_parser(fn: Callable): +def _udf_parser(fn: Callable) -> Optional[Callable]: """A decorator that acts as a transparent translator for Python UDFs used in Deephaven query formulas between Python and Java. This decorator is intended for internal use by the Deephaven query engine and should not be used by users. @@ -531,6 +535,9 @@ def _udf_parser(fn: Callable): return fn p_sig = _parse_signature(fn) + if p_sig is None: + return None + return_array = p_sig.ret_annotation.has_array ret_np_char = p_sig.ret_annotation.encoded_type[-1] ret_dtype = dtypes.from_np_dtype(np.dtype(ret_np_char if ret_np_char != "X" else "O")) diff --git a/py/server/tests/test_udf_scalar_args.py b/py/server/tests/test_udf_scalar_args.py index 408b58486aa..617fa85be86 100644 --- a/py/server/tests/test_udf_scalar_args.py +++ b/py/server/tests/test_udf_scalar_args.py @@ -613,6 +613,12 @@ def f(p1: float, p2: np.float64) -> bool: self.assertRegex(str(w[-1].message), "numpy scalar type.*is used") self.assertEqual(10, t.to_string().count("true")) + def test_no_signature(self): + builtin_max = max + t = empty_table(10).update("X = (int) builtin_max(1, 2, 3)") + self.assertEqual(t.columns[0].data_type, dtypes.int32) + self.assertEqual(10, t.to_string().count("3")) + if __name__ == "__main__": unittest.main() diff --git a/py/server/tests/test_vectorization.py b/py/server/tests/test_vectorization.py index ab227d02cc5..7039ebceb38 100644 --- a/py/server/tests/test_vectorization.py +++ b/py/server/tests/test_vectorization.py @@ -342,5 +342,15 @@ def f3(p1: np.ndarray[np.bool_]) -> np.ndarray[np.bool_]: self.assertEqual(_udf.vectorized_count, 1) _udf.vectorized_count = 0 + def test_no_signature_array(self): + builtin_max = max + + t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? `deephaven`: `rocks`"]).group_by("X").update("Y = Y.toArray()") + t1 = t.update(["X1 = builtin_max(Y)"]) + self.assertEqual(t1.columns[2].data_type, dtypes.JObject) + self.assertEqual(_udf.vectorized_count, 0) + _udf.vectorized_count = 0 + + if __name__ == "__main__": unittest.main() From d167332d81a7114529b5d50c12f72aa5a0608bd9 Mon Sep 17 00:00:00 2001 From: jianfengmao Date: Fri, 15 Nov 2024 13:57:27 -0700 Subject: [PATCH 2/2] Better naming and code cleanup --- .../java/io/deephaven/engine/util/PyCallableWrapper.java | 2 +- .../io/deephaven/engine/util/PyCallableWrapperJpyImpl.java | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java index 604a992d28d..11c560804d8 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapper.java @@ -56,7 +56,7 @@ public String getName() { class Signature { private final List parameters; private final Class returnType; - public static Signature EMPTY_SIGNATURE = new Signature(List.of(), null); + public final static Signature EMPTY = new Signature(List.of(), null); public Signature(List parameters, Class returnType) { this.parameters = parameters; diff --git a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java index a9ba9e863b0..bfb061c5e7a 100644 --- a/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java +++ b/engine/table/src/main/java/io/deephaven/engine/util/PyCallableWrapperJpyImpl.java @@ -197,7 +197,7 @@ private void prepareSignature() { // a function in an extension module or a signature-less/multi-signature builtin function such as 'max'. if (pyUdfDecorator.isNone()) { pyUdfDecorator = null; - signature = Signature.EMPTY_SIGNATURE; + signature = Signature.EMPTY; } else { signatureString = pyUdfDecorator.getAttribute("signature").toString(); } @@ -212,7 +212,7 @@ public void parseSignature() { prepareSignature(); - if (signature == Signature.EMPTY_SIGNATURE) { + if (signature == Signature.EMPTY) { return; } @@ -297,9 +297,8 @@ public static boolean isLosslessWideningPrimitiveConversion(@NotNull Class or return false; } - public void verifyArguments(Class[] argTypes) { - if (signature == Signature.EMPTY_SIGNATURE) { + if (signature == Signature.EMPTY) { return; }