Skip to content

Commit

Permalink
Add parse typing arguments from model predict method of: Optional, Se…
Browse files Browse the repository at this point in the history
…quence (List, Tuple, Set). (#22)

* Add parse typing arguments from model predict method of: Optional, List.
  • Loading branch information
nxexox authored Nov 14, 2024
1 parent 48577ca commit 7482bf1
Show file tree
Hide file tree
Showing 11 changed files with 406 additions and 76 deletions.
5 changes: 3 additions & 2 deletions docs/config_file.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,12 @@ Default is `ModelLibraryType.SKLEARN`.
`columns (Optional[List[Dict[str, str]]])` - Columns description for model predict method.
Format: List[Dict].

Example [{"name": "col1", "type": "str", "required": True, "default": None}]
Example [{"name": "col1", "type": "str", "required": True, "default": None, "collection_type": "List"}]
name - Column name;
type - Column type in string: int, str, float, bool.
required - bool and optional field. By default is True.
Default - Any data and optional.
default - Any data and optional.
collection_type - Type of collection is optional field. Supported: List. Default is None.

If you not set columns, then columns validation should be False.

Expand Down
2 changes: 1 addition & 1 deletion mlup/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
from mlup.web.app import MLupWebApp, WebAppConfig


__version__ = "0.2.2"
__version__ = "0.2.3"
110 changes: 93 additions & 17 deletions mlup/utils/interspection.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,14 @@
import inspect
import logging
from enum import Enum
from typing import Callable, Any, Union, Optional, Type
from typing import Callable, Any, Union, Optional, Type, Generic, Sequence, Dict, Tuple

try:
from typing import get_args as typing_get_args, get_origin as typing_get_origin
# For Python <= 3.7
except ImportError:
typing_get_args = lambda t: getattr(t, '__args__', ()) if t is not Generic else Generic # noqa: E731
typing_get_origin = lambda t: getattr(t, '__origin__', None) # noqa: E731

from mlup.constants import IS_X, THERE_IS_ARGS, DEFAULT_X_ARG_NAME, BinarizationType, LoadedFile
from mlup.utils.profiling import TimeProfiler
Expand All @@ -11,6 +18,77 @@
logger = logging.getLogger('mlup')


SUPPORTED_PRIMITIVE_TYPES = {
int: 'int',
float: 'float',
bool: 'bool',
str: 'str',
}


def _is_optional(field: Any) -> bool:
return typing_get_origin(field) is Union and type(None) in typing_get_args(field)


def _is_sequence(field: Any) -> bool:
_type_for_check = typing_get_origin(field)
_collection_types = (list, set, tuple)
try:
return ((field is not None and field in _collection_types)
or (_type_for_check is not None and issubclass(_type_for_check, Sequence)))
except TypeError:
# If _type_for_check not in class object& For example Optional[int].
return False


def parse_attributes_from_generic_type(
param: inspect.Parameter,
) -> Tuple[Dict[str, Any], bool]:
"""
Search and return primitive type from single level of Generic.
If ont found supported types, return default type = str.
:param inspect.Parameter param: Parameter that needs to be parsed.
:return: Attributes from parsed Generic and result parsing.
If bool is True, then parsing was success, else parsing was failure.
{"type": "int", "required": False, "collection_type": None}, True
Key "type" is optional.
Key "collection_type" is optional.
:rtype: Dict[str, Any], bool
"""
result = {
'required': True,
}
_types_for_analyze = typing_get_args(param.annotation)

logger.debug(f"Analyze argument '{param.name}', attempt to pick up determine primitive type.")

if _is_optional(param.annotation):
result['required'] = False
if _is_sequence(param.annotation):
result['collection_type'] = 'List'
if len(_types_for_analyze) > 0 and _is_sequence(_types_for_analyze[0]):
result['collection_type'] = 'List'
_types_for_analyze = typing_get_args(_types_for_analyze[0])

for p in _types_for_analyze:
if p in SUPPORTED_PRIMITIVE_TYPES:
result['type'] = SUPPORTED_PRIMITIVE_TYPES[p]
break

_parse_error = False

if 'type' not in result:
logger.warning(f"Cannot determine primitive type for '{param.name}'.")
_parse_error = True

logger.debug(f"For argument '{param.name}' parsing result '{result}'")

return result, _parse_error


def get_class_by_path(path_to_class: Union[str, Enum]) -> Any:
"""
Get class by path to class. Use importlib.import_module.
Expand Down Expand Up @@ -66,12 +144,6 @@ def example(a, b = 100, *, c: float = 123):
sign = inspect.signature(func)
arg_spec = inspect.getfullargspec(func)
result = []
types = {
int: 'int',
float: 'float',
bool: 'bool',
str: 'str',
}
is_there_args = False
logger.info(f'Analyzing arguments in {func}.')

Expand All @@ -93,23 +165,27 @@ def example(a, b = 100, *, c: float = 123):
'required': True,
}
if param_obj.annotation is not inspect._empty:
if param_obj.annotation in types:
param_data['type'] = types[param_obj.annotation]
if param_obj.annotation in SUPPORTED_PRIMITIVE_TYPES:
param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[param_obj.annotation]
else:
logger.warning(
f'For model predict argument writes not supported type {param_obj.annotation}. '
f'Skip added validation'
)
_param_attributes, _parse_error = parse_attributes_from_generic_type(param_obj)
param_data.update(**_param_attributes)

if _parse_error:
logger.warning(
f'For model predict argument writes not supported type {param_obj.annotation}. '
f'Skip added validation'
)

if param_obj.default is not inspect._empty:
param_data['required'] = False
param_data['default'] = param_obj.default
if 'type' not in param_data and type(param_obj.default) in types:
param_data['type'] = types[type(param_obj.default)]
if 'type' not in param_data and type(param_obj.default) in SUPPORTED_PRIMITIVE_TYPES:
param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[type(param_obj.default)]

if param_name.lower().strip() == 'x' and auto_detect_predict_params:
logger.info('Found X param in model params. Set List type')
param_data['type'] = 'List'
param_data['collection_type'] = 'List'
param_data[IS_X] = True
_found_X = True

Expand All @@ -121,7 +197,7 @@ def example(a, b = 100, *, c: float = 123):
f'X argument in predict method not found. '
f'For predict data use first argument with name "{result[0]["name"]}".'
)
result[0]['type'] = 'List'
result[0]['collection_type'] = 'List'
result[0][IS_X] = True
else:
logger.info('Not found arguments in predict method.')
Expand Down
13 changes: 11 additions & 2 deletions mlup/web/api_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,19 @@ def make_columns_object_openapi_scheme(src_columns: List[Dict]) -> Tuple[Dict, L
cols_openapi_config = {}
required_columns = []
for col_config in src_columns:
col_name, col_type = col_config["name"], col_config.get("type", "str")
col_name = col_config["name"]
col_required, col_default = col_config.get("required", True), col_config.get("default", None)

_col_config = {"type": _openapi_types_map[col_type.lower()]}
_col_config = {}
col_type = col_config.get("type", None)
if "collection_type" in col_config:
if col_type is not None:
_col_config["items"] = {"type": _openapi_types_map[col_type.lower()]}
_col_config["type"] = _openapi_types_map[col_config["collection_type"].lower()]
col_type = "list"
else:
_col_config["type"] = _openapi_types_map[col_type].lower() or "string"

title = []
if col_default is not None:
title.append("Default")
Expand Down
6 changes: 6 additions & 0 deletions mlup/web/api_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ def make_map_pydantic_columns(
'str': str,
'list': list,
}
collection_types = {
'List': List,
}
__validators__ = {}
columns_pydantic_format = {}
# If set None, from ml.columns
Expand All @@ -102,6 +105,9 @@ def make_map_pydantic_columns(
f'Supported types {", ".join(column_types.keys())}.'
)
col_type = Any
# Process from "int" to "Generic[int]"
if col_config.get('collection_type', None) in collection_types:
col_type = collection_types[col_config['collection_type']][col_type]

# Required
field_required_default_value = Field(...)
Expand Down
102 changes: 82 additions & 20 deletions tests/unit_tests/ml/test_data_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,19 @@ def test_transform_to_model_format_from_list(self):

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
{'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52},
{'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53},
{'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54},
{'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55},
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51, 'col6': [61, 71]},
{'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52, 'col6': [62, 72]},
{'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53, 'col6': [63, 73]},
{'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54, 'col6': [64, 74]},
{'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55, 'col6': [65, 75]},
]
cols = [
{'name': 'col1', 'type': 'int'},
{'name': 'col2', 'type': 'int'},
{'name': 'col3', 'type': 'int'},
{'name': 'col4', 'type': 'int'},
{'name': 'col5', 'type': 'int'},
{'name': 'col6', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
Expand Down Expand Up @@ -168,13 +169,14 @@ def test_transform_to_model_format_from_list(self):
{'name': 'col3', 'type': 'int'},
{'name': 'colstr', 'type': 'str'},
{'name': 'colbool', 'type': 'bool'},
{'name': 'collistfloat', 'type': 'float', 'collection_type': 'List'},
]
data = [
[11, 21, 31, 'colstr', True],
[12, 22, 32, 'colstr', True],
[13, 23, 33, 'colstr', False],
[14, 24, 34, 'colstr', True],
[15, 25, 35, 'colstr', True],
[11, 21, 31, 'colstr', True, [41.0, 51.0]],
[12, 22, 32, 'colstr', True, [42.0, 52.0]],
[13, 23, 33, 'colstr', False, [43.0, 53.0]],
[14, 24, 34, 'colstr', True, [44.0, 54.0]],
[15, 25, 35, 'colstr', True, [45.0, 55.0]],
]
df = self.transformer_class().transform_to_model_format(data, columns=cols)

Expand All @@ -187,11 +189,11 @@ def test_transform_to_model_format_from_list(self):

def test_transform_to_json_format(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True},
{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True},
{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False},
{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True},
{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True},
{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]},
{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]},
{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]},
{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]},
{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]},
]
df = pd.DataFrame(data=data)
trans_data = self.transformer_class().transform_to_json_format(df)
Expand All @@ -200,11 +202,11 @@ def test_transform_to_json_format(self):

def test_transform_to_json_format_from_list(self):
data = [
[{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}],
[{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}],
[{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}],
[{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}],
[{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}],
[{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}],
[{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}],
]
dfs = [pd.DataFrame(data=d) for d in data]
trans_data = self.transformer_class().transform_to_json_format(dfs)
Expand Down Expand Up @@ -253,6 +255,26 @@ def test_transform_to_model_format_from_list(self):
pred_d = self.transformer_class().transform_to_model_format(data)
assert np.array_equal(pred_d, np.array(data))

def test_transform_to_model_format_from_dict_with_list_columns(self):
data = [
{'col1': [11, 111], 'col2': [21, 221]},
{'col1': [12, 112], 'col2': [22, 222]},
{'col1': [13, 113], 'col2': [23, 223]},
{'col1': [14, 114], 'col2': [24, 224]},
{'col1': [15, 115], 'col2': [25, 225]},
]
cols = [
{'name': 'col1', 'type': 'int', 'collection_type': 'List'},
{'name': 'col2', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
assert np.array_equal(pred_d, np.array([list(v.values()) for v in data]))

# Check order by columns
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
assert np.array_equal(pred_d, np.array([list(v.values())[::-1] for v in data]))

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
Expand Down Expand Up @@ -359,6 +381,26 @@ def test_transform_to_model_format_from_list(self):
pred_d = self.transformer_class().transform_to_model_format(data)
assert_tf_tensors(pred_d, tensorflow.convert_to_tensor(data))

def test_transform_to_model_format_from_dict_with_list_columns(self):
data = [
{'col1': [11, 111], 'col2': [21, 221]},
{'col1': [12, 112], 'col2': [22, 222]},
{'col1': [13, 113], 'col2': [23, 223]},
{'col1': [14, 114], 'col2': [24, 224]},
{'col1': [15, 115], 'col2': [25, 225]},
]
cols = [
{'name': 'col1', 'type': 'int', 'collection_type': 'List'},
{'name': 'col2', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values()) for v in data]))

# Check order by columns
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values())[::-1] for v in data]))

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
Expand Down Expand Up @@ -467,6 +509,26 @@ def test_transform_to_model_format_from_list(self):
pred_d = self.transformer_class().transform_to_model_format(data)
assert is_equal_torch_tensors(pred_d, torch.tensor(data))

def test_transform_to_model_format_from_dict_with_list_columns(self):
data = [
{'col1': [11, 111], 'col2': [21, 221]},
{'col1': [12, 112], 'col2': [22, 222]},
{'col1': [13, 113], 'col2': [23, 223]},
{'col1': [14, 114], 'col2': [24, 224]},
{'col1': [15, 115], 'col2': [25, 225]},
]
cols = [
{'name': 'col1', 'type': 'int', 'collection_type': 'List'},
{'name': 'col2', 'type': 'int', 'collection_type': 'List'},
]
# Check create
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values()) for v in data]))

# Check order by columns
pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values())[::-1] for v in data]))

def test_transform_to_model_format_from_dict_with_columns(self):
data = [
{'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
Expand Down
Loading

0 comments on commit 7482bf1

Please sign in to comment.