diff --git a/docs/config_file.md b/docs/config_file.md index e59b385..d8e496d 100644 --- a/docs/config_file.md +++ b/docs/config_file.md @@ -34,11 +34,12 @@ Default is `ModelLibraryType.SKLEARN`. `columns (Optional[List[Dict[str, str]]])` - Columns description for model predict method. Format: List[Dict]. - Example [{"name": "col1", "type": "str", "required": True, "default": None}] + Example [{"name": "col1", "type": "str", "required": True, "default": None, "collection_type": "List"}] name - Column name; type - Column type in string: int, str, float, bool. required - bool and optional field. By default is True. - Default - Any data and optional. + default - Any data and optional. + collection_type - Type of collection is optional field. Supported: List. Default is None. If you not set columns, then columns validation should be False. diff --git a/mlup/__init__.py b/mlup/__init__.py index 473bc8e..714ae22 100644 --- a/mlup/__init__.py +++ b/mlup/__init__.py @@ -3,4 +3,4 @@ from mlup.web.app import MLupWebApp, WebAppConfig -__version__ = "0.2.2" +__version__ = "0.2.3" diff --git a/mlup/utils/interspection.py b/mlup/utils/interspection.py index ea66b5c..d037d35 100644 --- a/mlup/utils/interspection.py +++ b/mlup/utils/interspection.py @@ -2,7 +2,14 @@ import inspect import logging from enum import Enum -from typing import Callable, Any, Union, Optional, Type +from typing import Callable, Any, Union, Optional, Type, Generic, Sequence, Dict, Tuple + +try: + from typing import get_args as typing_get_args, get_origin as typing_get_origin +# For Python <= 3.7 +except ImportError: + typing_get_args = lambda t: getattr(t, '__args__', ()) if t is not Generic else Generic # noqa: E731 + typing_get_origin = lambda t: getattr(t, '__origin__', None) # noqa: E731 from mlup.constants import IS_X, THERE_IS_ARGS, DEFAULT_X_ARG_NAME, BinarizationType, LoadedFile from mlup.utils.profiling import TimeProfiler @@ -11,6 +18,77 @@ logger = logging.getLogger('mlup') +SUPPORTED_PRIMITIVE_TYPES = { + int: 'int', + float: 'float', + bool: 'bool', + str: 'str', +} + + +def _is_optional(field: Any) -> bool: + return typing_get_origin(field) is Union and type(None) in typing_get_args(field) + + +def _is_sequence(field: Any) -> bool: + _type_for_check = typing_get_origin(field) + _collection_types = (list, set, tuple) + try: + return ((field is not None and field in _collection_types) + or (_type_for_check is not None and issubclass(_type_for_check, Sequence))) + except TypeError: + # If _type_for_check not in class object& For example Optional[int]. + return False + + +def parse_attributes_from_generic_type( + param: inspect.Parameter, +) -> Tuple[Dict[str, Any], bool]: + """ + Search and return primitive type from single level of Generic. + If ont found supported types, return default type = str. + + :param inspect.Parameter param: Parameter that needs to be parsed. + + :return: Attributes from parsed Generic and result parsing. + If bool is True, then parsing was success, else parsing was failure. + {"type": "int", "required": False, "collection_type": None}, True + Key "type" is optional. + Key "collection_type" is optional. + :rtype: Dict[str, Any], bool + + """ + result = { + 'required': True, + } + _types_for_analyze = typing_get_args(param.annotation) + + logger.debug(f"Analyze argument '{param.name}', attempt to pick up determine primitive type.") + + if _is_optional(param.annotation): + result['required'] = False + if _is_sequence(param.annotation): + result['collection_type'] = 'List' + if len(_types_for_analyze) > 0 and _is_sequence(_types_for_analyze[0]): + result['collection_type'] = 'List' + _types_for_analyze = typing_get_args(_types_for_analyze[0]) + + for p in _types_for_analyze: + if p in SUPPORTED_PRIMITIVE_TYPES: + result['type'] = SUPPORTED_PRIMITIVE_TYPES[p] + break + + _parse_error = False + + if 'type' not in result: + logger.warning(f"Cannot determine primitive type for '{param.name}'.") + _parse_error = True + + logger.debug(f"For argument '{param.name}' parsing result '{result}'") + + return result, _parse_error + + def get_class_by_path(path_to_class: Union[str, Enum]) -> Any: """ Get class by path to class. Use importlib.import_module. @@ -66,12 +144,6 @@ def example(a, b = 100, *, c: float = 123): sign = inspect.signature(func) arg_spec = inspect.getfullargspec(func) result = [] - types = { - int: 'int', - float: 'float', - bool: 'bool', - str: 'str', - } is_there_args = False logger.info(f'Analyzing arguments in {func}.') @@ -93,23 +165,27 @@ def example(a, b = 100, *, c: float = 123): 'required': True, } if param_obj.annotation is not inspect._empty: - if param_obj.annotation in types: - param_data['type'] = types[param_obj.annotation] + if param_obj.annotation in SUPPORTED_PRIMITIVE_TYPES: + param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[param_obj.annotation] else: - logger.warning( - f'For model predict argument writes not supported type {param_obj.annotation}. ' - f'Skip added validation' - ) + _param_attributes, _parse_error = parse_attributes_from_generic_type(param_obj) + param_data.update(**_param_attributes) + + if _parse_error: + logger.warning( + f'For model predict argument writes not supported type {param_obj.annotation}. ' + f'Skip added validation' + ) if param_obj.default is not inspect._empty: param_data['required'] = False param_data['default'] = param_obj.default - if 'type' not in param_data and type(param_obj.default) in types: - param_data['type'] = types[type(param_obj.default)] + if 'type' not in param_data and type(param_obj.default) in SUPPORTED_PRIMITIVE_TYPES: + param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[type(param_obj.default)] if param_name.lower().strip() == 'x' and auto_detect_predict_params: logger.info('Found X param in model params. Set List type') - param_data['type'] = 'List' + param_data['collection_type'] = 'List' param_data[IS_X] = True _found_X = True @@ -121,7 +197,7 @@ def example(a, b = 100, *, c: float = 123): f'X argument in predict method not found. ' f'For predict data use first argument with name "{result[0]["name"]}".' ) - result[0]['type'] = 'List' + result[0]['collection_type'] = 'List' result[0][IS_X] = True else: logger.info('Not found arguments in predict method.') diff --git a/mlup/web/api_docs.py b/mlup/web/api_docs.py index 922ae52..e44514f 100644 --- a/mlup/web/api_docs.py +++ b/mlup/web/api_docs.py @@ -35,10 +35,19 @@ def make_columns_object_openapi_scheme(src_columns: List[Dict]) -> Tuple[Dict, L cols_openapi_config = {} required_columns = [] for col_config in src_columns: - col_name, col_type = col_config["name"], col_config.get("type", "str") + col_name = col_config["name"] col_required, col_default = col_config.get("required", True), col_config.get("default", None) - _col_config = {"type": _openapi_types_map[col_type.lower()]} + _col_config = {} + col_type = col_config.get("type", None) + if "collection_type" in col_config: + if col_type is not None: + _col_config["items"] = {"type": _openapi_types_map[col_type.lower()]} + _col_config["type"] = _openapi_types_map[col_config["collection_type"].lower()] + col_type = "list" + else: + _col_config["type"] = _openapi_types_map[col_type].lower() or "string" + title = [] if col_default is not None: title.append("Default") diff --git a/mlup/web/api_validators.py b/mlup/web/api_validators.py index 0541dd0..883f939 100644 --- a/mlup/web/api_validators.py +++ b/mlup/web/api_validators.py @@ -84,6 +84,9 @@ def make_map_pydantic_columns( 'str': str, 'list': list, } + collection_types = { + 'List': List, + } __validators__ = {} columns_pydantic_format = {} # If set None, from ml.columns @@ -102,6 +105,9 @@ def make_map_pydantic_columns( f'Supported types {", ".join(column_types.keys())}.' ) col_type = Any + # Process from "int" to "Generic[int]" + if col_config.get('collection_type', None) in collection_types: + col_type = collection_types[col_config['collection_type']][col_type] # Required field_required_default_value = Field(...) diff --git a/tests/unit_tests/ml/test_data_transformers.py b/tests/unit_tests/ml/test_data_transformers.py index 100cd85..b083f21 100644 --- a/tests/unit_tests/ml/test_data_transformers.py +++ b/tests/unit_tests/ml/test_data_transformers.py @@ -93,11 +93,11 @@ def test_transform_to_model_format_from_list(self): def test_transform_to_model_format_from_dict_with_columns(self): data = [ - {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, - {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52}, - {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53}, - {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54}, - {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55}, + {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51, 'col6': [61, 71]}, + {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52, 'col6': [62, 72]}, + {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53, 'col6': [63, 73]}, + {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54, 'col6': [64, 74]}, + {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55, 'col6': [65, 75]}, ] cols = [ {'name': 'col1', 'type': 'int'}, @@ -105,6 +105,7 @@ def test_transform_to_model_format_from_dict_with_columns(self): {'name': 'col3', 'type': 'int'}, {'name': 'col4', 'type': 'int'}, {'name': 'col5', 'type': 'int'}, + {'name': 'col6', 'type': 'int', 'collection_type': 'List'}, ] # Check create pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) @@ -168,13 +169,14 @@ def test_transform_to_model_format_from_list(self): {'name': 'col3', 'type': 'int'}, {'name': 'colstr', 'type': 'str'}, {'name': 'colbool', 'type': 'bool'}, + {'name': 'collistfloat', 'type': 'float', 'collection_type': 'List'}, ] data = [ - [11, 21, 31, 'colstr', True], - [12, 22, 32, 'colstr', True], - [13, 23, 33, 'colstr', False], - [14, 24, 34, 'colstr', True], - [15, 25, 35, 'colstr', True], + [11, 21, 31, 'colstr', True, [41.0, 51.0]], + [12, 22, 32, 'colstr', True, [42.0, 52.0]], + [13, 23, 33, 'colstr', False, [43.0, 53.0]], + [14, 24, 34, 'colstr', True, [44.0, 54.0]], + [15, 25, 35, 'colstr', True, [45.0, 55.0]], ] df = self.transformer_class().transform_to_model_format(data, columns=cols) @@ -187,11 +189,11 @@ def test_transform_to_model_format_from_list(self): def test_transform_to_json_format(self): data = [ - {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}, - {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}, - {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}, - {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}, - {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}, + {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}, + {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}, + {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}, + {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}, + {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}, ] df = pd.DataFrame(data=data) trans_data = self.transformer_class().transform_to_json_format(df) @@ -200,11 +202,11 @@ def test_transform_to_json_format(self): def test_transform_to_json_format_from_list(self): data = [ - [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}], - [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}], - [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}], - [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}], - [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}], + [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}], + [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}], + [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}], + [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}], + [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}], ] dfs = [pd.DataFrame(data=d) for d in data] trans_data = self.transformer_class().transform_to_json_format(dfs) @@ -253,6 +255,26 @@ def test_transform_to_model_format_from_list(self): pred_d = self.transformer_class().transform_to_model_format(data) assert np.array_equal(pred_d, np.array(data)) + def test_transform_to_model_format_from_dict_with_list_columns(self): + data = [ + {'col1': [11, 111], 'col2': [21, 221]}, + {'col1': [12, 112], 'col2': [22, 222]}, + {'col1': [13, 113], 'col2': [23, 223]}, + {'col1': [14, 114], 'col2': [24, 224]}, + {'col1': [15, 115], 'col2': [25, 225]}, + ] + cols = [ + {'name': 'col1', 'type': 'int', 'collection_type': 'List'}, + {'name': 'col2', 'type': 'int', 'collection_type': 'List'}, + ] + # Check create + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) + assert np.array_equal(pred_d, np.array([list(v.values()) for v in data])) + + # Check order by columns + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1]) + assert np.array_equal(pred_d, np.array([list(v.values())[::-1] for v in data])) + def test_transform_to_model_format_from_dict_with_columns(self): data = [ {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, @@ -359,6 +381,26 @@ def test_transform_to_model_format_from_list(self): pred_d = self.transformer_class().transform_to_model_format(data) assert_tf_tensors(pred_d, tensorflow.convert_to_tensor(data)) + def test_transform_to_model_format_from_dict_with_list_columns(self): + data = [ + {'col1': [11, 111], 'col2': [21, 221]}, + {'col1': [12, 112], 'col2': [22, 222]}, + {'col1': [13, 113], 'col2': [23, 223]}, + {'col1': [14, 114], 'col2': [24, 224]}, + {'col1': [15, 115], 'col2': [25, 225]}, + ] + cols = [ + {'name': 'col1', 'type': 'int', 'collection_type': 'List'}, + {'name': 'col2', 'type': 'int', 'collection_type': 'List'}, + ] + # Check create + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) + assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values()) for v in data])) + + # Check order by columns + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1]) + assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values())[::-1] for v in data])) + def test_transform_to_model_format_from_dict_with_columns(self): data = [ {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, @@ -467,6 +509,26 @@ def test_transform_to_model_format_from_list(self): pred_d = self.transformer_class().transform_to_model_format(data) assert is_equal_torch_tensors(pred_d, torch.tensor(data)) + def test_transform_to_model_format_from_dict_with_list_columns(self): + data = [ + {'col1': [11, 111], 'col2': [21, 221]}, + {'col1': [12, 112], 'col2': [22, 222]}, + {'col1': [13, 113], 'col2': [23, 223]}, + {'col1': [14, 114], 'col2': [24, 224]}, + {'col1': [15, 115], 'col2': [25, 225]}, + ] + cols = [ + {'name': 'col1', 'type': 'int', 'collection_type': 'List'}, + {'name': 'col2', 'type': 'int', 'collection_type': 'List'}, + ] + # Check create + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) + assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values()) for v in data])) + + # Check order by columns + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1]) + assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values())[::-1] for v in data])) + def test_transform_to_model_format_from_dict_with_columns(self): data = [ {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, diff --git a/tests/unit_tests/ml/test_model.py b/tests/unit_tests/ml/test_model.py index e7a0985..de2a4a7 100644 --- a/tests/unit_tests/ml/test_model.py +++ b/tests/unit_tests/ml/test_model.py @@ -320,7 +320,7 @@ def test_load_model_from_pickle_change_predict_arguments( mlup_model.load(force_loading=True) assert mlup_model._predict_arguments != predict_arguments assert mlup_model._predict_arguments == [ - {'name': 'Y', 'required': True, 'type': 'List', 'is_X': True}, + {'name': 'Y', 'required': True, 'collection_type': 'List', 'is_X': True}, {'name': 'test_param', 'required': False, 'type': 'bool', 'default': False}, ] @@ -800,3 +800,21 @@ async def test_predict_from_numpy(self): predicted_data = await mlup_model.predict_from(X=[1, 2, 3, 4, 5, 6, 7, 8, 9]) np.array_equal(predicted_data, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])) + + @pytest.mark.asyncio + async def test_predict_with_list_columns(self): + mlup_model = MLupModel( + ml_model=ModelWithX(), + conf=ModelConfig( + data_transformer_for_predict=ModelDataTransformerType.NUMPY_ARR, + data_transformer_for_predicted=ModelDataTransformerType.SRC_TYPES, + columns=[ + {"name": "col1", "type": "int", "collection_type": "List"}, + {"name": "col2", "type": "int", "collection_type": "List"}, + ], + ) + ) + mlup_model.load() + + predicted_data = await mlup_model.predict_from(X=[[1, 2], [3, 4], [5, 6]]) + np.array_equal(predicted_data, np.array([[1, 2], [3, 4], [5, 6]])) diff --git a/tests/unit_tests/utils/test_interspection.py b/tests/unit_tests/utils/test_interspection.py index cf459e2..de37a7d 100644 --- a/tests/unit_tests/utils/test_interspection.py +++ b/tests/unit_tests/utils/test_interspection.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional, Union, Tuple, Dict import sys import pytest @@ -13,6 +13,22 @@ def pred_func_without_x(wt, y: List, b: bool = False): pass def pred_func_with_list(wt, x: list, b: bool = False): pass +def pred_func_with_optional_params( + wt, + x: list, + opt: Optional[int], + opt_unknown_type: Optional[Dict], + opt_union: Union[float, None], + opt_union_tuple: Union[Tuple, None], + opt_union_unknown_type: Union[Dict, None], + opt_union_otherwise: Union[None, float], + opt_union_many_types: Union[float, Tuple, int, str, bool, None], + opt_default: Optional[int] = 10, + opt_union_default: Union[float, None] = 100, + b: bool = False, +): pass + + if sys.version_info.minor >= 9: def pred_func_with_list_of_int(wt, x: list[int], b: bool = False): pass else: @@ -21,17 +37,37 @@ def pred_func_with_list_of_int(wt, x: List[int], b: bool = False): pass pred_func_args_without_auto_detect_predict_params = [ {'name': 'wt', 'required': True}, - {'name': 'x', 'required': True}, + {'name': 'x', 'required': True, 'collection_type': 'List', 'type': 'str'}, + {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, +] +pred_func_args_without_auto_detect_predict_params_with_X_int = [ + {'name': 'wt', 'required': True}, + {'name': 'x', 'required': True, 'collection_type': 'List', 'type': 'int'}, + {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, +] +pred_func_args_without_auto_detect_predict_params_without_X_type = [ + {'name': 'wt', 'required': True}, + {'name': 'x', 'required': True, 'collection_type': 'List'}, {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, ] pred_func_args_with_auto_detect_predict_params = [ {'name': 'wt', 'required': True}, - {'name': 'x', 'required': True, 'type': 'List', IS_X: True}, + {'name': 'x', 'required': True, IS_X: True, 'collection_type': 'List'}, + {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, +] +pred_func_args_with_auto_detect_predict_params_with_x_type_str = [ + {'name': 'wt', 'required': True}, + {'name': 'x', 'required': True, IS_X: True, 'collection_type': 'List', 'type': 'str'}, + {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, +] +pred_func_args_with_auto_detect_predict_params_with_x_type_int = [ + {'name': 'wt', 'required': True}, + {'name': 'x', 'required': True, IS_X: True, 'collection_type': 'List', 'type': 'int'}, {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, ] pred_func_args_with_auto_detect_predict_params_without_x = [ - {'name': 'wt', 'required': True, 'type': 'List', IS_X: True}, - {'name': 'y', 'required': True}, + {'name': 'wt', 'required': True, 'collection_type': 'List', IS_X: True}, + {'name': 'y', 'required': True, 'collection_type': 'List'}, {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, ] @@ -46,11 +82,27 @@ def pred_func_with_cls_and_X_List_of_str(cls, wt, x: List[str], b: bool = False) def pred_func_staticmethod_with_list(wt, x: list, b: bool = False): pass def pred_func_without_x(self, wt, y: List, b: bool = False): pass + def pred_func_with_optional_params( + self, + wt, + x: list, + opt: Optional[int], + opt_unknown_type: Optional[Dict], + opt_union: Union[float, None], + opt_union_tuple: Union[Tuple, None], + opt_union_unknown_type: Union[Dict, None], + opt_union_otherwise: Union[None, float], + opt_union_many_types: Union[float, Tuple, int, str, bool, None], + opt_default: Optional[int] = 10, + opt_union_default: Union[float, None] = 100, + b: bool = False, + ): pass + @pytest.mark.parametrize( 'pred_func, expected_result', [ pytest.param( - pred_func_with_X_List, pred_func_args_without_auto_detect_predict_params, + pred_func_with_X_List, pred_func_args_without_auto_detect_predict_params_without_X_type, id='func_with_X_type_List' ), pytest.param( @@ -58,35 +110,41 @@ def pred_func_without_x(self, wt, y: List, b: bool = False): pass id='func_with_X_type_List[str]' ), pytest.param( - pred_func_with_list, pred_func_args_without_auto_detect_predict_params, + pred_func_with_list, pred_func_args_without_auto_detect_predict_params_without_X_type, id='func_with_X_type_list' ), pytest.param( - pred_func_with_list_of_int, pred_func_args_without_auto_detect_predict_params, + pred_func_with_list_of_int, pred_func_args_without_auto_detect_predict_params_with_X_int, id='func_with_X_type_list[int]' ), pytest.param( - ModelClass().pred_func_with_self_and_X_List, pred_func_args_without_auto_detect_predict_params, + ModelClass().pred_func_with_self_and_X_List, + pred_func_args_without_auto_detect_predict_params_without_X_type, id='method_from_obj_with_X_type_List' ), pytest.param( - ModelClass.pred_func_with_self_and_X_List, pred_func_args_without_auto_detect_predict_params, + ModelClass.pred_func_with_self_and_X_List, + pred_func_args_without_auto_detect_predict_params_without_X_type, id='method_fro_cls_with_X_type_List' ), pytest.param( - ModelClass.pred_func_with_cls_and_X_List_of_str, pred_func_args_without_auto_detect_predict_params, + ModelClass.pred_func_with_cls_and_X_List_of_str, + pred_func_args_without_auto_detect_predict_params, id='classmethod_from_cls_with_cls_and_X_List[str]' ), pytest.param( - ModelClass().pred_func_with_cls_and_X_List_of_str, pred_func_args_without_auto_detect_predict_params, + ModelClass().pred_func_with_cls_and_X_List_of_str, + pred_func_args_without_auto_detect_predict_params, id='classmethod_from_obj_with_cls_and_X_List[str]' ), pytest.param( - ModelClass.pred_func_staticmethod_with_list, pred_func_args_without_auto_detect_predict_params, + ModelClass.pred_func_staticmethod_with_list, + pred_func_args_without_auto_detect_predict_params_without_X_type, id='staticmethod_from_cls_with_cls_and_X_list' ), pytest.param( - ModelClass().pred_func_staticmethod_with_list, pred_func_args_without_auto_detect_predict_params, + ModelClass().pred_func_staticmethod_with_list, + pred_func_args_without_auto_detect_predict_params_without_X_type, id='staticmethod_from_obj_with_cls_and_X_list' ), ], @@ -103,7 +161,7 @@ def test_without_auto_detect_predict_params(self, pred_func, expected_result): id='func_with_X_type_List' ), pytest.param( - pred_func_with_X_List_of_str, pred_func_args_with_auto_detect_predict_params, + pred_func_with_X_List_of_str, pred_func_args_with_auto_detect_predict_params_with_x_type_str, id='func_with_X_type_List[str]' ), pytest.param( @@ -111,7 +169,7 @@ def test_without_auto_detect_predict_params(self, pred_func, expected_result): id='func_with_X_type_list' ), pytest.param( - pred_func_with_list_of_int, pred_func_args_with_auto_detect_predict_params, + pred_func_with_list_of_int, pred_func_args_with_auto_detect_predict_params_with_x_type_int, id='func_with_X_type_list[int]' ), pytest.param( @@ -123,19 +181,23 @@ def test_without_auto_detect_predict_params(self, pred_func, expected_result): id='method_from_cls_with_X_type_List' ), pytest.param( - ModelClass.pred_func_with_cls_and_X_List_of_str, pred_func_args_with_auto_detect_predict_params, + ModelClass.pred_func_with_cls_and_X_List_of_str, + pred_func_args_with_auto_detect_predict_params_with_x_type_str, id='classmethod_from_cls_with_X_type_List[str]' ), pytest.param( - ModelClass().pred_func_with_cls_and_X_List_of_str, pred_func_args_with_auto_detect_predict_params, + ModelClass().pred_func_with_cls_and_X_List_of_str, + pred_func_args_with_auto_detect_predict_params_with_x_type_str, id='classmethod_from_obj_with_X_type_List[str]' ), pytest.param( - ModelClass.pred_func_staticmethod_with_list, pred_func_args_with_auto_detect_predict_params, + ModelClass.pred_func_staticmethod_with_list, + pred_func_args_with_auto_detect_predict_params, id='staticmethod_from_cls_with_X_type_list' ), pytest.param( - ModelClass().pred_func_staticmethod_with_list, pred_func_args_with_auto_detect_predict_params, + ModelClass().pred_func_staticmethod_with_list, + pred_func_args_with_auto_detect_predict_params, id='staticmethod_from_obj_with_X_type_list' ), pytest.param( @@ -161,7 +223,7 @@ def test_with_auto_detect_predict_params(self, pred_func, expected_result): [ pytest.param( ModelClass.pred_func_with_self_and_X_List, - [{'name': 'self', 'required': True}] + pred_func_args_without_auto_detect_predict_params, + [{'name': 'self', 'required': True}] + pred_func_args_without_auto_detect_predict_params_without_X_type, id='method_with_X_List' ), pytest.param( @@ -169,7 +231,8 @@ def test_with_auto_detect_predict_params(self, pred_func, expected_result): id='classmethod_from_cls_with_X_List[str]' ), pytest.param( - ModelClass.pred_func_staticmethod_with_list, pred_func_args_without_auto_detect_predict_params, + ModelClass.pred_func_staticmethod_with_list, + pred_func_args_without_auto_detect_predict_params_without_X_type, id='staticmethod_from_cls_with_X_list' ), pytest.param( @@ -177,7 +240,7 @@ def test_with_auto_detect_predict_params(self, pred_func, expected_result): [ {'name': 'self', 'required': True}, {'name': 'wt', 'required': True}, - {'name': 'y', 'required': True}, + {'name': 'y', 'required': True, 'collection_type': 'List'}, {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, ], id='method_from_cls_without_X' @@ -188,6 +251,51 @@ def test_without_ignore_self(self, pred_func, expected_result): inspection_params = analyze_method_params(pred_func, auto_detect_predict_params=False, ignore_self=False) assert inspection_params == expected_result + @pytest.mark.parametrize( + 'pred_func, expected_result', + [ + pytest.param( + pred_func_with_optional_params, + [ + {'name': 'wt', 'required': True}, + {'name': 'x', 'required': True, 'collection_type': 'List'}, + {'name': 'opt', 'required': False, 'type': 'int'}, + {'name': 'opt_unknown_type', 'required': False}, + {'name': 'opt_union', 'required': False, 'type': 'float'}, + {'name': 'opt_union_tuple', 'required': False, 'collection_type': 'List'}, + {'name': 'opt_union_unknown_type', 'required': False}, + {'name': 'opt_union_otherwise', 'required': False, 'type': 'float'}, + {'name': 'opt_union_many_types', 'required': False, 'type': 'float'}, + {'name': 'opt_default', 'required': False, 'type': 'int', 'default': 10}, + {'name': 'opt_union_default', 'required': False, 'type': 'float', 'default': 100}, + {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, + ], + id='func' + ), + pytest.param( + ModelClass.pred_func_with_optional_params, + [ + {'name': 'wt', 'required': True}, + {'name': 'x', 'required': True, 'collection_type': 'List'}, + {'name': 'opt', 'required': False, 'type': 'int'}, + {'name': 'opt_unknown_type', 'required': False}, + {'name': 'opt_union', 'required': False, 'type': 'float'}, + {'name': 'opt_union_tuple', 'required': False, 'collection_type': 'List'}, + {'name': 'opt_union_unknown_type', 'required': False}, + {'name': 'opt_union_otherwise', 'required': False, 'type': 'float'}, + {'name': 'opt_union_many_types', 'required': False, 'type': 'float'}, + {'name': 'opt_default', 'required': False, 'type': 'int', 'default': 10}, + {'name': 'opt_union_default', 'required': False, 'type': 'float', 'default': 100}, + {'name': 'b', 'required': False, 'default': False, 'type': 'bool'}, + ], + id='method' + ), + ], + ) + def test_optional_params(self, pred_func, expected_result): + inspection_params = analyze_method_params(pred_func, auto_detect_predict_params=False, ignore_self=True) + assert inspection_params == expected_result + class TestAutoSearchBinarizationType: @pytest.mark.parametrize( diff --git a/tests/unit_tests/web/test_api_docs.py b/tests/unit_tests/web/test_api_docs.py index 43420bc..02b83d3 100644 --- a/tests/unit_tests/web/test_api_docs.py +++ b/tests/unit_tests/web/test_api_docs.py @@ -8,8 +8,9 @@ from mlup.up import UP, Config from mlup.web.api_docs import make_columns_object_openapi_scheme, generate_openapi_schema - -assertDictEqual = TestCase().assertDictEqual +testcase = TestCase() +testcase.maxDiff = None +assertDictEqual = testcase.assertDictEqual src_columns = [ diff --git a/tests/unit_tests/web/test_api_validators.py b/tests/unit_tests/web/test_api_validators.py index 39eda74..1b7bf71 100644 --- a/tests/unit_tests/web/test_api_validators.py +++ b/tests/unit_tests/web/test_api_validators.py @@ -18,6 +18,7 @@ 'bool': bool, 'str': str, 'list': list, + 'List': List, } src_columns = [ {"name": "Float", "type": "float"}, @@ -26,6 +27,8 @@ {"name": "FloatNotRequired", "type": "float", "required": False}, {"name": "FloatNotRequiredDefault", "type": "float", "required": False, "default": 1.4}, {"name": "FloatRequiredDefault", "type": "float", "required": True, "default": 1.4}, + {"name": "FloatList", "type": "float", "collection_type": "List"}, + {"name": "FloatOptionalList", "type": "float", "collection_type": "List", "required": False}, {"name": "Int", "type": "int"}, {"name": "IntDefault", "type": "int", "default": 4}, @@ -33,6 +36,8 @@ {"name": "IntNotRequired", "type": "int", "required": False}, {"name": "IntNotRequiredDefault", "type": "int", "required": False, "default": 4}, {"name": "IntRequiredDefault", "type": "int", "required": True, "default": 4}, + {"name": "IntList", "type": "int", "collection_type": "List"}, + {"name": "IntOptionalList", "type": "int", "collection_type": "List", "required": False}, {"name": "Str", "type": "str"}, {"name": "StrDefault", "type": "str", "default": "str"}, @@ -40,6 +45,8 @@ {"name": "StrNotRequired", "type": "str", "required": False}, {"name": "StrNotRequiredDefault", "type": "str", "required": False, "default": "str"}, {"name": "StrRequiredDefault", "type": "str", "required": True, "default": "str"}, + {"name": "StrList", "type": "str", "collection_type": "List"}, + {"name": "StrOptionalList", "type": "str", "collection_type": "List", "required": False}, {"name": "Bool", "type": "bool"}, {"name": "BoolDefault", "type": "bool", "default": True}, @@ -47,6 +54,8 @@ {"name": "BoolNotRequired", "type": "bool", "required": False}, {"name": "BoolNotRequiredDefault", "type": "bool", "required": False, "default": True}, {"name": "BoolRequiredDefault", "type": "bool", "required": True, "default": True}, + {"name": "BoolList", "type": "bool", "collection_type": "List"}, + {"name": "BoolOptionalList", "type": "bool", "collection_type": "List", "required": False}, ] @@ -56,7 +65,10 @@ def test_make_map_pydantic_columns(): for col_config in src_columns: pred_col_type, pred_field_info = cols_configs.pop(col_config["name"]) - assert pred_col_type is column_types_map[col_config["type"]] + if "collection_type" in col_config: + assert pred_col_type is column_types_map[col_config["collection_type"]][column_types_map[col_config["type"]]] + else: + assert pred_col_type is column_types_map[col_config["type"]] assert pred_field_info.title == col_config["name"] if 'default' in col_config: assert pred_field_info.default is col_config["default"] @@ -81,7 +93,10 @@ def test_make_map_pydantic_validation(): ) # Check valid type - _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) + if "collection_type" in col_config: + _test_pydantic_model(**{col_config["name"]: [1]}) + else: + _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) # Check not valid type try: not_valid_value = list @@ -94,12 +109,18 @@ def test_make_map_pydantic_validation(): elif pred_col_type is bool: assert msg_str == 'value could not be parsed to a boolean' else: - assert msg_str.startswith(f'value is not a valid {col_config["type"]}') + if "collection_type" in col_config: + assert msg_str.startswith(f'value is not a valid {col_config["collection_type"].lower()}') + else: + assert msg_str.startswith(f'value is not a valid {col_config["type"]}') # Check required if col_config.get("required", True): # Check valid value - _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) + if "collection_type" in col_config: + _test_pydantic_model(**{col_config["name"]: [1]}) + else: + _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) # Check none value try: _test_pydantic_model(**{col_config["name"]: None}) @@ -144,6 +165,8 @@ def test_make_map_pydantic_columns_with_IS_X(model_for_columns): assert pred_col_type is List[model_for_columns] else: assert pred_col_type is List[Any] + elif "collection_type" in col_config: + assert pred_col_type is List[column_types_map[col_config["type"]]] else: assert pred_col_type is column_types_map[col_config["type"]] assert pred_field_info.title == col_config["name"] @@ -166,12 +189,12 @@ def test_make_map_pydantic_columns_with_IS_X(model_for_columns): @pytest.mark.parametrize( 'column_validation, columns, data, expected_data', [ - (False, src_columns, [1, 2, 3], [1, 2, 3]), + (False, src_columns, [1, [4, 5], 2, 3], [1, [4, 5], 2, 3]), ( True, - [src_columns[0], src_columns[6], src_columns[12]], - [{'Float': 1.0, 'Int': 1, 'Str': '1', 'NotExistsKey': 10}], - [{'Float': 1.0, 'Int': 1, 'Str': '1'}] + [src_columns[0], src_columns[6], src_columns[8], src_columns[16]], + [{'Float': 1.0, 'FloatList': [1.0, 2.0], 'Int': 1, 'Str': '1', 'NotExistsKey': 10}], + [{'Float': 1.0, 'FloatList': [1.0, 2.0], 'Int': 1, 'Str': '1'}] ), ], ids=['column_validation=False', 'column_validation=True'] diff --git a/tests/unit_tests/web/test_app.py b/tests/unit_tests/web/test_app.py index 27b2e51..9711d57 100644 --- a/tests/unit_tests/web/test_app.py +++ b/tests/unit_tests/web/test_app.py @@ -365,6 +365,32 @@ async def test_predict_model_numpy_returned_valid(web_app_test_client, list_to_n assert response.json() == {"predict_result": [[1, 2, 3]]} +@pytest.mark.asyncio +async def test_predict_model_with_list_fields_column_validation(web_app_test_client, list_to_numpy_array_model): + mlup_model = MLupModel( + ml_model=list_to_numpy_array_model, + conf=ModelConfig( + data_transformer_for_predict=ModelDataTransformerType.NUMPY_ARR, + data_transformer_for_predicted=ModelDataTransformerType.NUMPY_ARR, + columns=[ + {"name": "col1", "type": "int", "collection_type": "List"}, + {"name": "col2", "type": "int", "collection_type": "List"}, + ] + ) + ) + mlup_web_app = MLupWebApp( + ml=mlup_model, + conf=WebAppConfig(mode=WebAppArchitecture.directly_to_predict, column_validation=True) + ) + mlup_model.load() + mlup_web_app.load() + with web_app_test_client(mlup_web_app) as api_test_client: + response = await api_test_client.post("/predict", json={'X': [{"col1": [1, 2, 3], "col2": [4, 5, 6]}]}) + assert response.status_code == 200 + assert response.headers['x-predict-id'] + assert response.json() == {"predict_result": [[[1, 2, 3], [4, 5, 6]]]} + + @pytest.mark.asyncio async def test_predict_model_numpy_returned_not_valid(web_app_test_client, list_to_numpy_array_model): mlup_model = MLupModel(