Add parse typing arguments from model predict method of: Optional, Se…

…quence (List, Tuple, Set). (#22) * Add parse typing arguments from model predict method of: Optional, List.
nxexox · Nov 14, 2024 · 7482bf1 · 7482bf1
1 parent 48577ca
commit 7482bf1
Show file tree

Hide file tree

Showing 11 changed files with 406 additions and 76 deletions.
diff --git a/docs/config_file.md b/docs/config_file.md
@@ -34,11 +34,12 @@ Default is `ModelLibraryType.SKLEARN`.
 `columns (Optional[List[Dict[str, str]]])` - Columns description for model predict method.
     Format: List[Dict].
 
-    Example [{"name": "col1", "type": "str", "required": True, "default": None}]
+    Example [{"name": "col1", "type": "str", "required": True, "default": None, "collection_type": "List"}]
     name - Column name;
     type - Column type in string: int, str, float, bool.
     required - bool and optional field. By default is True.
-    Default - Any data and optional.
+    default - Any data and optional.
+    collection_type - Type of collection is optional field. Supported: List. Default is None.
 
 If you not set columns, then columns validation should be False. 
 

diff --git a/mlup/__init__.py b/mlup/__init__.py
@@ -3,4 +3,4 @@
 from mlup.web.app import MLupWebApp, WebAppConfig
 
 
-__version__ = "0.2.2"
+__version__ = "0.2.3"
diff --git a/mlup/utils/interspection.py b/mlup/utils/interspection.py
@@ -2,7 +2,14 @@
 import inspect
 import logging
 from enum import Enum
-from typing import Callable, Any, Union, Optional, Type
+from typing import Callable, Any, Union, Optional, Type, Generic, Sequence, Dict, Tuple
+
+try:
+    from typing import get_args as typing_get_args, get_origin as typing_get_origin
+# For Python <= 3.7
+except ImportError:
+    typing_get_args = lambda t: getattr(t, '__args__', ()) if t is not Generic else Generic  # noqa: E731
+    typing_get_origin = lambda t: getattr(t, '__origin__', None)  # noqa: E731
 
 from mlup.constants import IS_X, THERE_IS_ARGS, DEFAULT_X_ARG_NAME, BinarizationType, LoadedFile
 from mlup.utils.profiling import TimeProfiler
@@ -11,6 +18,77 @@
 logger = logging.getLogger('mlup')
 
 
+SUPPORTED_PRIMITIVE_TYPES = {
+    int: 'int',
+    float: 'float',
+    bool: 'bool',
+    str: 'str',
+}
+
+
+def _is_optional(field: Any) -> bool:
+    return typing_get_origin(field) is Union and type(None) in typing_get_args(field)
+
+
+def _is_sequence(field: Any) -> bool:
+    _type_for_check = typing_get_origin(field)
+    _collection_types = (list, set, tuple)
+    try:
+        return ((field is not None and field in _collection_types)
+                or (_type_for_check is not None and issubclass(_type_for_check, Sequence)))
+    except TypeError:
+        # If _type_for_check not in class object& For example Optional[int].
+        return False
+
+
+def parse_attributes_from_generic_type(
+    param: inspect.Parameter,
+) -> Tuple[Dict[str, Any], bool]:
+    """
+    Search and return primitive type from single level of Generic.
+    If ont found supported types, return default type = str.
+
+    :param inspect.Parameter param: Parameter that needs to be parsed.
+
+    :return: Attributes from parsed Generic and result parsing.
+        If bool is True, then parsing was success, else parsing was failure.
+        {"type": "int", "required": False, "collection_type": None}, True
+        Key "type" is optional.
+        Key "collection_type" is optional.
+    :rtype: Dict[str, Any], bool
+
+    """
+    result = {
+        'required': True,
+    }
+    _types_for_analyze = typing_get_args(param.annotation)
+
+    logger.debug(f"Analyze argument '{param.name}', attempt to pick up determine primitive type.")
+
+    if _is_optional(param.annotation):
+        result['required'] = False
+    if _is_sequence(param.annotation):
+        result['collection_type'] = 'List'
+    if len(_types_for_analyze) > 0 and _is_sequence(_types_for_analyze[0]):
+        result['collection_type'] = 'List'
+        _types_for_analyze = typing_get_args(_types_for_analyze[0])
+
+    for p in _types_for_analyze:
+        if p in SUPPORTED_PRIMITIVE_TYPES:
+            result['type'] = SUPPORTED_PRIMITIVE_TYPES[p]
+            break
+
+    _parse_error = False
+
+    if 'type' not in result:
+        logger.warning(f"Cannot determine primitive type for '{param.name}'.")
+        _parse_error = True
+
+    logger.debug(f"For argument '{param.name}' parsing result '{result}'")
+
+    return result, _parse_error
+
+
 def get_class_by_path(path_to_class: Union[str, Enum]) -> Any:
     """
     Get class by path to class. Use importlib.import_module.
@@ -66,12 +144,6 @@ def example(a, b = 100, *, c: float = 123):
     sign = inspect.signature(func)
     arg_spec = inspect.getfullargspec(func)
     result = []
-    types = {
-        int: 'int',
-        float: 'float',
-        bool: 'bool',
-        str: 'str',
-    }
     is_there_args = False
     logger.info(f'Analyzing arguments in {func}.')
 
@@ -93,23 +165,27 @@ def example(a, b = 100, *, c: float = 123):
             'required': True,
         }
         if param_obj.annotation is not inspect._empty:
-            if param_obj.annotation in types:
-                param_data['type'] = types[param_obj.annotation]
+            if param_obj.annotation in SUPPORTED_PRIMITIVE_TYPES:
+                param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[param_obj.annotation]
             else:
-                logger.warning(
-                    f'For model predict argument writes not supported type {param_obj.annotation}. '
-                    f'Skip added validation'
-                )
+                _param_attributes, _parse_error = parse_attributes_from_generic_type(param_obj)
+                param_data.update(**_param_attributes)
+
+                if _parse_error:
+                    logger.warning(
+                        f'For model predict argument writes not supported type {param_obj.annotation}. '
+                        f'Skip added validation'
+                    )
 
         if param_obj.default is not inspect._empty:
             param_data['required'] = False
             param_data['default'] = param_obj.default
-            if 'type' not in param_data and type(param_obj.default) in types:
-                param_data['type'] = types[type(param_obj.default)]
+            if 'type' not in param_data and type(param_obj.default) in SUPPORTED_PRIMITIVE_TYPES:
+                param_data['type'] = SUPPORTED_PRIMITIVE_TYPES[type(param_obj.default)]
 
         if param_name.lower().strip() == 'x' and auto_detect_predict_params:
             logger.info('Found X param in model params. Set List type')
-            param_data['type'] = 'List'
+            param_data['collection_type'] = 'List'
             param_data[IS_X] = True
             _found_X = True
 
@@ -121,7 +197,7 @@ def example(a, b = 100, *, c: float = 123):
                 f'X argument in predict method not found. '
                 f'For predict data use first argument with name "{result[0]["name"]}".'
             )
-            result[0]['type'] = 'List'
+            result[0]['collection_type'] = 'List'
             result[0][IS_X] = True
         else:
             logger.info('Not found arguments in predict method.')

diff --git a/mlup/web/api_docs.py b/mlup/web/api_docs.py
@@ -35,10 +35,19 @@ def make_columns_object_openapi_scheme(src_columns: List[Dict]) -> Tuple[Dict, L
     cols_openapi_config = {}
     required_columns = []
     for col_config in src_columns:
-        col_name, col_type = col_config["name"], col_config.get("type", "str")
+        col_name = col_config["name"]
         col_required, col_default = col_config.get("required", True), col_config.get("default", None)
 
-        _col_config = {"type": _openapi_types_map[col_type.lower()]}
+        _col_config = {}
+        col_type = col_config.get("type", None)
+        if "collection_type" in col_config:
+            if col_type is not None:
+                _col_config["items"] = {"type": _openapi_types_map[col_type.lower()]}
+            _col_config["type"] = _openapi_types_map[col_config["collection_type"].lower()]
+            col_type = "list"
+        else:
+            _col_config["type"] = _openapi_types_map[col_type].lower() or "string"
+
         title = []
         if col_default is not None:
             title.append("Default")

diff --git a/mlup/web/api_validators.py b/mlup/web/api_validators.py
@@ -84,6 +84,9 @@ def make_map_pydantic_columns(
         'str': str,
         'list': list,
     }
+    collection_types = {
+        'List': List,
+    }
     __validators__ = {}
     columns_pydantic_format = {}
     # If set None, from ml.columns
@@ -102,6 +105,9 @@ def make_map_pydantic_columns(
                     f'Supported types {", ".join(column_types.keys())}.'
                 )
                 col_type = Any
+            # Process from "int" to "Generic[int]"
+            if col_config.get('collection_type', None) in collection_types:
+                col_type = collection_types[col_config['collection_type']][col_type]
 
         # Required
         field_required_default_value = Field(...)

diff --git a/tests/unit_tests/ml/test_data_transformers.py b/tests/unit_tests/ml/test_data_transformers.py
@@ -93,18 +93,19 @@ def test_transform_to_model_format_from_list(self):
 
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
-            {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
-            {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52},
-            {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53},
-            {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54},
-            {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55},
+            {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51, 'col6': [61, 71]},
+            {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52, 'col6': [62, 72]},
+            {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53, 'col6': [63, 73]},
+            {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54, 'col6': [64, 74]},
+            {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55, 'col6': [65, 75]},
         ]
         cols = [
             {'name': 'col1', 'type': 'int'},
             {'name': 'col2', 'type': 'int'},
             {'name': 'col3', 'type': 'int'},
             {'name': 'col4', 'type': 'int'},
             {'name': 'col5', 'type': 'int'},
+            {'name': 'col6', 'type': 'int', 'collection_type': 'List'},
         ]
         # Check create
         pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
@@ -168,13 +169,14 @@ def test_transform_to_model_format_from_list(self):
             {'name': 'col3', 'type': 'int'},
             {'name': 'colstr', 'type': 'str'},
             {'name': 'colbool', 'type': 'bool'},
+            {'name': 'collistfloat', 'type': 'float', 'collection_type': 'List'},
         ]
         data = [
-            [11, 21, 31, 'colstr', True],
-            [12, 22, 32, 'colstr', True],
-            [13, 23, 33, 'colstr', False],
-            [14, 24, 34, 'colstr', True],
-            [15, 25, 35, 'colstr', True],
+            [11, 21, 31, 'colstr', True, [41.0, 51.0]],
+            [12, 22, 32, 'colstr', True, [42.0, 52.0]],
+            [13, 23, 33, 'colstr', False, [43.0, 53.0]],
+            [14, 24, 34, 'colstr', True, [44.0, 54.0]],
+            [15, 25, 35, 'colstr', True, [45.0, 55.0]],
         ]
         df = self.transformer_class().transform_to_model_format(data, columns=cols)
 
@@ -187,11 +189,11 @@ def test_transform_to_model_format_from_list(self):
 
     def test_transform_to_json_format(self):
         data = [
-            {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True},
-            {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True},
-            {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False},
-            {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True},
-            {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True},
+            {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]},
+            {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]},
+            {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]},
+            {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]},
+            {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]},
         ]
         df = pd.DataFrame(data=data)
         trans_data = self.transformer_class().transform_to_json_format(df)
@@ -200,11 +202,11 @@ def test_transform_to_json_format(self):
 
     def test_transform_to_json_format_from_list(self):
         data = [
-            [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}],
-            [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}],
-            [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}],
-            [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}],
-            [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}],
+            [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}],
+            [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}],
+            [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}],
+            [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}],
+            [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}],
         ]
         dfs = [pd.DataFrame(data=d) for d in data]
         trans_data = self.transformer_class().transform_to_json_format(dfs)
@@ -253,6 +255,26 @@ def test_transform_to_model_format_from_list(self):
         pred_d = self.transformer_class().transform_to_model_format(data)
         assert np.array_equal(pred_d, np.array(data))
 
+    def test_transform_to_model_format_from_dict_with_list_columns(self):
+        data = [
+            {'col1': [11, 111], 'col2': [21, 221]},
+            {'col1': [12, 112], 'col2': [22, 222]},
+            {'col1': [13, 113], 'col2': [23, 223]},
+            {'col1': [14, 114], 'col2': [24, 224]},
+            {'col1': [15, 115], 'col2': [25, 225]},
+        ]
+        cols = [
+            {'name': 'col1', 'type': 'int', 'collection_type': 'List'},
+            {'name': 'col2', 'type': 'int', 'collection_type': 'List'},
+        ]
+        # Check create
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
+        assert np.array_equal(pred_d, np.array([list(v.values()) for v in data]))
+
+        # Check order by columns
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
+        assert np.array_equal(pred_d, np.array([list(v.values())[::-1] for v in data]))
+
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
             {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
@@ -359,6 +381,26 @@ def test_transform_to_model_format_from_list(self):
         pred_d = self.transformer_class().transform_to_model_format(data)
         assert_tf_tensors(pred_d, tensorflow.convert_to_tensor(data))
 
+    def test_transform_to_model_format_from_dict_with_list_columns(self):
+        data = [
+            {'col1': [11, 111], 'col2': [21, 221]},
+            {'col1': [12, 112], 'col2': [22, 222]},
+            {'col1': [13, 113], 'col2': [23, 223]},
+            {'col1': [14, 114], 'col2': [24, 224]},
+            {'col1': [15, 115], 'col2': [25, 225]},
+        ]
+        cols = [
+            {'name': 'col1', 'type': 'int', 'collection_type': 'List'},
+            {'name': 'col2', 'type': 'int', 'collection_type': 'List'},
+        ]
+        # Check create
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
+        assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values()) for v in data]))
+
+        # Check order by columns
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
+        assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values())[::-1] for v in data]))
+
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
             {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
@@ -467,6 +509,26 @@ def test_transform_to_model_format_from_list(self):
         pred_d = self.transformer_class().transform_to_model_format(data)
         assert is_equal_torch_tensors(pred_d, torch.tensor(data))
 
+    def test_transform_to_model_format_from_dict_with_list_columns(self):
+        data = [
+            {'col1': [11, 111], 'col2': [21, 221]},
+            {'col1': [12, 112], 'col2': [22, 222]},
+            {'col1': [13, 113], 'col2': [23, 223]},
+            {'col1': [14, 114], 'col2': [24, 224]},
+            {'col1': [15, 115], 'col2': [25, 225]},
+        ]
+        cols = [
+            {'name': 'col1', 'type': 'int', 'collection_type': 'List'},
+            {'name': 'col2', 'type': 'int', 'collection_type': 'List'},
+        ]
+        # Check create
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
+        assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values()) for v in data]))
+
+        # Check order by columns
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
+        assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values())[::-1] for v in data]))
+
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
             {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,4 +3,4 @@
		from mlup.web.app import MLupWebApp, WebAppConfig


		__version__ = "0.2.2"
		__version__ = "0.2.3"