Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Correctly handle signature-less functions for Py UDF calls #6368

Merged
merged 2 commits into from
Nov 15, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ public String getName() {
class Signature {
private final List<Parameter> parameters;
private final Class<?> returnType;
public static Signature EMPTY_SIGNATURE = new Signature(List.of(), null);
jmao-denver marked this conversation as resolved.
Show resolved Hide resolved

public Signature(List<Parameter> parameters, Class<?> returnType) {
this.parameters = parameters;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -193,18 +193,29 @@ private void prepareSignature() {
vectorized = false;
}
pyUdfDecorator = dh_udf_module.call("_udf_parser", pyCallable);
signatureString = pyUdfDecorator.getAttribute("signature").toString();
// The Python UDF parser failed to get/parse the callable's signature. This is likely due to it being
// a function in an extension module or a signature-less/multi-signature builtin function such as 'max'.
if (pyUdfDecorator.isNone()) {
pyUdfDecorator = null;
signature = Signature.EMPTY_SIGNATURE;
} else {
signatureString = pyUdfDecorator.getAttribute("signature").toString();
}
}


@Override
public void parseSignature() {
if (signatureString != null) {
if (signature != null) {
return;
}

prepareSignature();

if (signature == Signature.EMPTY_SIGNATURE) {
return;
}

// the 'types' field of a vectorized function follows the pattern of '[ilhfdb?O]*->[ilhfdb?O]',
// eg. [ll->d] defines two int64 (long) arguments and a double return type.
if (signatureString == null || signatureString.isEmpty()) {
Expand Down Expand Up @@ -286,7 +297,12 @@ public static boolean isLosslessWideningPrimitiveConversion(@NotNull Class<?> or
return false;
}


jmao-denver marked this conversation as resolved.
Show resolved Hide resolved
public void verifyArguments(Class<?>[] argTypes) {
if (signature == Signature.EMPTY_SIGNATURE) {
return;
}

String callableName = pyCallable.getAttribute("__name__").toString();
List<Parameter> parameters = signature.getParameters();

Expand Down Expand Up @@ -358,7 +374,7 @@ public void verifyArguments(Class<?>[] argTypes) {

// In vectorized mode, we want to call the vectorized function directly.
public PyObject vectorizedCallable() {
if (numbaVectorized) {
if (numbaVectorized || pyUdfDecorator == null) {
return pyCallable;
} else {
return pyUdfDecorator.call("__call__", this.argTypesStr, true);
Expand Down
19 changes: 13 additions & 6 deletions py/server/deephaven/_udf.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,7 +485,7 @@ def _parse_np_ufunc_signature(fn: numpy.ufunc) -> _ParsedSignature:
return p_sig


def _parse_signature(fn: Callable) -> _ParsedSignature:
def _parse_signature(fn: Callable) -> Optional[_ParsedSignature]:
""" Parse the signature of a function """

if numba:
Expand All @@ -496,10 +496,14 @@ def _parse_signature(fn: Callable) -> _ParsedSignature:
return _parse_np_ufunc_signature(fn)
else:
p_sig = _ParsedSignature(fn=fn)
if sys.version_info >= (3, 10):
sig = inspect.signature(fn, eval_str=True) # novermin
else:
sig = inspect.signature(fn)
try:
if sys.version_info >= (3, 10):
sig = inspect.signature(fn, eval_str=True) # novermin
else:
sig = inspect.signature(fn)
except ValueError:
# some built-in functions don't have a signature, neither do some functions from C extensions
return None

for n, p in sig.parameters.items():
# when from __future__ import annotations is used, the annotation is a string, we need to eval it to get
Expand All @@ -513,7 +517,7 @@ def _parse_signature(fn: Callable) -> _ParsedSignature:
p_sig.ret_annotation = _parse_return_annotation(t)
return p_sig

def _udf_parser(fn: Callable):
def _udf_parser(fn: Callable) -> Optional[Callable]:
"""A decorator that acts as a transparent translator for Python UDFs used in Deephaven query formulas between
Python and Java. This decorator is intended for internal use by the Deephaven query engine and should not be used by
users.
Expand All @@ -531,6 +535,9 @@ def _udf_parser(fn: Callable):
return fn

p_sig = _parse_signature(fn)
if p_sig is None:
return None

return_array = p_sig.ret_annotation.has_array
ret_np_char = p_sig.ret_annotation.encoded_type[-1]
ret_dtype = dtypes.from_np_dtype(np.dtype(ret_np_char if ret_np_char != "X" else "O"))
Expand Down
6 changes: 6 additions & 0 deletions py/server/tests/test_udf_scalar_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,12 @@ def f(p1: float, p2: np.float64) -> bool:
self.assertRegex(str(w[-1].message), "numpy scalar type.*is used")
self.assertEqual(10, t.to_string().count("true"))

def test_no_signature(self):
builtin_max = max
t = empty_table(10).update("X = (int) builtin_max(1, 2, 3)")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR is marked as fixing #6349. #6349 mentions two problems. This test is casting the return type, so it is either a partial fix of the ticket or there should be another test to check type inference.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking into this problem some more, pybind11 actually does provide signature information in a way that I think we are not yet supporting. From the example in #6349, I added:

print(blackscholes.price.__doc__)

This outputs a docstring that is prefixed with the method signature.

price(arg0: float, arg1: float, arg2: float, arg3: float, arg4: float, arg5: bool, arg6: bool) -> float

Here are more things that may be useful:

print(type(blackscholes.price))
<class 'builtin_function_or_method'>
print(blackscholes.price.__qualname__)
PyCapsule.price

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. I don't think the doc string of a callable is a reliable source to infer its signature. Even if it is, the work needed to do that correctly may not justify the benefit
  2. that said, jedi does try to infer function arguments for sphinx, epydoc and basic numpydoc docstrings, but it doesn't expose any public API for that.
  3. PyCharm seems to rely solely on signatures to do static check/code assistant on functions

Based on the above, I am not sure it is so bad that we just document the limitation and workaround when we can't get a signature via the standard inspect module.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could easily make this "just work" by using the return type. Relying on documented workarounds is a poor substitute to the product functioning as expected.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean we do a very crude parsing (regex search) of doc and look for '-> type' to get the 'return type'?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The improvement requested makes sense but is not trivial to do. We want to get the bug fix in first and #6382 is filed as an enhancement ticket for more broad inference support by the UDF parser.

self.assertEqual(t.columns[0].data_type, dtypes.int32)
self.assertEqual(10, t.to_string().count("3"))


if __name__ == "__main__":
unittest.main()
10 changes: 10 additions & 0 deletions py/server/tests/test_vectorization.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,5 +342,15 @@ def f3(p1: np.ndarray[np.bool_]) -> np.ndarray[np.bool_]:
self.assertEqual(_udf.vectorized_count, 1)
_udf.vectorized_count = 0

def test_no_signature_array(self):
builtin_max = max

t = empty_table(10).update(["X = i % 3", "Y = i % 2 == 0? `deephaven`: `rocks`"]).group_by("X").update("Y = Y.toArray()")
t1 = t.update(["X1 = builtin_max(Y)"])
self.assertEqual(t1.columns[2].data_type, dtypes.JObject)
self.assertEqual(_udf.vectorized_count, 0)
_udf.vectorized_count = 0


if __name__ == "__main__":
unittest.main()
Loading