From e59d51b00f6740a4415a98c5b02f5a384e96d9c4 Mon Sep 17 00:00:00 2001
From: Deys Timofey <nxexox@gmail.com>
Date: Tue, 12 Nov 2024 22:51:09 +0500
Subject: [PATCH] Add tests. Add docs

---
 docs/config_file.md                           |   5 +-
 tests/unit_tests/ml/test_data_transformers.py | 102 ++++++++++++++----
 tests/unit_tests/ml/test_model.py             |  18 ++++
 tests/unit_tests/web/test_api_validators.py   |  39 +++++--
 tests/unit_tests/web/test_app.py              |  26 +++++
 5 files changed, 160 insertions(+), 30 deletions(-)

diff --git a/docs/config_file.md b/docs/config_file.md
index e59b385..d8e496d 100644
--- a/docs/config_file.md
+++ b/docs/config_file.md
@@ -34,11 +34,12 @@ Default is `ModelLibraryType.SKLEARN`.
 `columns (Optional[List[Dict[str, str]]])` - Columns description for model predict method.
     Format: List[Dict].
 
-    Example [{"name": "col1", "type": "str", "required": True, "default": None}]
+    Example [{"name": "col1", "type": "str", "required": True, "default": None, "collection_type": "List"}]
     name - Column name;
     type - Column type in string: int, str, float, bool.
     required - bool and optional field. By default is True.
-    Default - Any data and optional.
+    default - Any data and optional.
+    collection_type - Type of collection is optional field. Supported: List. Default is None.
     
 If you not set columns, then columns validation should be False. 
 
diff --git a/tests/unit_tests/ml/test_data_transformers.py b/tests/unit_tests/ml/test_data_transformers.py
index 100cd85..b083f21 100644
--- a/tests/unit_tests/ml/test_data_transformers.py
+++ b/tests/unit_tests/ml/test_data_transformers.py
@@ -93,11 +93,11 @@ def test_transform_to_model_format_from_list(self):
 
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
-            {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
-            {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52},
-            {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53},
-            {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54},
-            {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55},
+            {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51, 'col6': [61, 71]},
+            {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52, 'col6': [62, 72]},
+            {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53, 'col6': [63, 73]},
+            {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54, 'col6': [64, 74]},
+            {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55, 'col6': [65, 75]},
         ]
         cols = [
             {'name': 'col1', 'type': 'int'},
@@ -105,6 +105,7 @@ def test_transform_to_model_format_from_dict_with_columns(self):
             {'name': 'col3', 'type': 'int'},
             {'name': 'col4', 'type': 'int'},
             {'name': 'col5', 'type': 'int'},
+            {'name': 'col6', 'type': 'int', 'collection_type': 'List'},
         ]
         # Check create
         pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
@@ -168,13 +169,14 @@ def test_transform_to_model_format_from_list(self):
             {'name': 'col3', 'type': 'int'},
             {'name': 'colstr', 'type': 'str'},
             {'name': 'colbool', 'type': 'bool'},
+            {'name': 'collistfloat', 'type': 'float', 'collection_type': 'List'},
         ]
         data = [
-            [11, 21, 31, 'colstr', True],
-            [12, 22, 32, 'colstr', True],
-            [13, 23, 33, 'colstr', False],
-            [14, 24, 34, 'colstr', True],
-            [15, 25, 35, 'colstr', True],
+            [11, 21, 31, 'colstr', True, [41.0, 51.0]],
+            [12, 22, 32, 'colstr', True, [42.0, 52.0]],
+            [13, 23, 33, 'colstr', False, [43.0, 53.0]],
+            [14, 24, 34, 'colstr', True, [44.0, 54.0]],
+            [15, 25, 35, 'colstr', True, [45.0, 55.0]],
         ]
         df = self.transformer_class().transform_to_model_format(data, columns=cols)
 
@@ -187,11 +189,11 @@ def test_transform_to_model_format_from_list(self):
 
     def test_transform_to_json_format(self):
         data = [
-            {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True},
-            {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True},
-            {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False},
-            {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True},
-            {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True},
+            {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]},
+            {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]},
+            {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]},
+            {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]},
+            {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]},
         ]
         df = pd.DataFrame(data=data)
         trans_data = self.transformer_class().transform_to_json_format(df)
@@ -200,11 +202,11 @@ def test_transform_to_json_format(self):
 
     def test_transform_to_json_format_from_list(self):
         data = [
-            [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}],
-            [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}],
-            [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}],
-            [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}],
-            [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}],
+            [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}],
+            [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}],
+            [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}],
+            [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}],
+            [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}],
         ]
         dfs = [pd.DataFrame(data=d) for d in data]
         trans_data = self.transformer_class().transform_to_json_format(dfs)
@@ -253,6 +255,26 @@ def test_transform_to_model_format_from_list(self):
         pred_d = self.transformer_class().transform_to_model_format(data)
         assert np.array_equal(pred_d, np.array(data))
 
+    def test_transform_to_model_format_from_dict_with_list_columns(self):
+        data = [
+            {'col1': [11, 111], 'col2': [21, 221]},
+            {'col1': [12, 112], 'col2': [22, 222]},
+            {'col1': [13, 113], 'col2': [23, 223]},
+            {'col1': [14, 114], 'col2': [24, 224]},
+            {'col1': [15, 115], 'col2': [25, 225]},
+        ]
+        cols = [
+            {'name': 'col1', 'type': 'int', 'collection_type': 'List'},
+            {'name': 'col2', 'type': 'int', 'collection_type': 'List'},
+        ]
+        # Check create
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
+        assert np.array_equal(pred_d, np.array([list(v.values()) for v in data]))
+
+        # Check order by columns
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
+        assert np.array_equal(pred_d, np.array([list(v.values())[::-1] for v in data]))
+
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
             {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
@@ -359,6 +381,26 @@ def test_transform_to_model_format_from_list(self):
         pred_d = self.transformer_class().transform_to_model_format(data)
         assert_tf_tensors(pred_d, tensorflow.convert_to_tensor(data))
 
+    def test_transform_to_model_format_from_dict_with_list_columns(self):
+        data = [
+            {'col1': [11, 111], 'col2': [21, 221]},
+            {'col1': [12, 112], 'col2': [22, 222]},
+            {'col1': [13, 113], 'col2': [23, 223]},
+            {'col1': [14, 114], 'col2': [24, 224]},
+            {'col1': [15, 115], 'col2': [25, 225]},
+        ]
+        cols = [
+            {'name': 'col1', 'type': 'int', 'collection_type': 'List'},
+            {'name': 'col2', 'type': 'int', 'collection_type': 'List'},
+        ]
+        # Check create
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
+        assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values()) for v in data]))
+
+        # Check order by columns
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
+        assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values())[::-1] for v in data]))
+
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
             {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
@@ -467,6 +509,26 @@ def test_transform_to_model_format_from_list(self):
         pred_d = self.transformer_class().transform_to_model_format(data)
         assert is_equal_torch_tensors(pred_d, torch.tensor(data))
 
+    def test_transform_to_model_format_from_dict_with_list_columns(self):
+        data = [
+            {'col1': [11, 111], 'col2': [21, 221]},
+            {'col1': [12, 112], 'col2': [22, 222]},
+            {'col1': [13, 113], 'col2': [23, 223]},
+            {'col1': [14, 114], 'col2': [24, 224]},
+            {'col1': [15, 115], 'col2': [25, 225]},
+        ]
+        cols = [
+            {'name': 'col1', 'type': 'int', 'collection_type': 'List'},
+            {'name': 'col2', 'type': 'int', 'collection_type': 'List'},
+        ]
+        # Check create
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols)
+        assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values()) for v in data]))
+
+        # Check order by columns
+        pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1])
+        assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values())[::-1] for v in data]))
+
     def test_transform_to_model_format_from_dict_with_columns(self):
         data = [
             {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51},
diff --git a/tests/unit_tests/ml/test_model.py b/tests/unit_tests/ml/test_model.py
index ddc26bd..de2a4a7 100644
--- a/tests/unit_tests/ml/test_model.py
+++ b/tests/unit_tests/ml/test_model.py
@@ -800,3 +800,21 @@ async def test_predict_from_numpy(self):
 
         predicted_data = await mlup_model.predict_from(X=[1, 2, 3, 4, 5, 6, 7, 8, 9])
         np.array_equal(predicted_data, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9]))
+
+    @pytest.mark.asyncio
+    async def test_predict_with_list_columns(self):
+        mlup_model = MLupModel(
+            ml_model=ModelWithX(),
+            conf=ModelConfig(
+                data_transformer_for_predict=ModelDataTransformerType.NUMPY_ARR,
+                data_transformer_for_predicted=ModelDataTransformerType.SRC_TYPES,
+                columns=[
+                    {"name": "col1", "type": "int", "collection_type": "List"},
+                    {"name": "col2", "type": "int", "collection_type": "List"},
+                ],
+            )
+        )
+        mlup_model.load()
+
+        predicted_data = await mlup_model.predict_from(X=[[1, 2], [3, 4], [5, 6]])
+        np.array_equal(predicted_data, np.array([[1, 2], [3, 4], [5, 6]]))
diff --git a/tests/unit_tests/web/test_api_validators.py b/tests/unit_tests/web/test_api_validators.py
index 39eda74..1b7bf71 100644
--- a/tests/unit_tests/web/test_api_validators.py
+++ b/tests/unit_tests/web/test_api_validators.py
@@ -18,6 +18,7 @@
     'bool': bool,
     'str': str,
     'list': list,
+    'List': List,
 }
 src_columns = [
     {"name": "Float", "type": "float"},
@@ -26,6 +27,8 @@
     {"name": "FloatNotRequired", "type": "float", "required": False},
     {"name": "FloatNotRequiredDefault", "type": "float", "required": False, "default": 1.4},
     {"name": "FloatRequiredDefault", "type": "float", "required": True, "default": 1.4},
+    {"name": "FloatList", "type": "float", "collection_type": "List"},
+    {"name": "FloatOptionalList", "type": "float", "collection_type": "List", "required": False},
 
     {"name": "Int", "type": "int"},
     {"name": "IntDefault", "type": "int", "default": 4},
@@ -33,6 +36,8 @@
     {"name": "IntNotRequired", "type": "int", "required": False},
     {"name": "IntNotRequiredDefault", "type": "int", "required": False, "default": 4},
     {"name": "IntRequiredDefault", "type": "int", "required": True, "default": 4},
+    {"name": "IntList", "type": "int", "collection_type": "List"},
+    {"name": "IntOptionalList", "type": "int", "collection_type": "List", "required": False},
 
     {"name": "Str", "type": "str"},
     {"name": "StrDefault", "type": "str", "default": "str"},
@@ -40,6 +45,8 @@
     {"name": "StrNotRequired", "type": "str", "required": False},
     {"name": "StrNotRequiredDefault", "type": "str", "required": False, "default": "str"},
     {"name": "StrRequiredDefault", "type": "str", "required": True, "default": "str"},
+    {"name": "StrList", "type": "str", "collection_type": "List"},
+    {"name": "StrOptionalList", "type": "str", "collection_type": "List", "required": False},
 
     {"name": "Bool", "type": "bool"},
     {"name": "BoolDefault", "type": "bool", "default": True},
@@ -47,6 +54,8 @@
     {"name": "BoolNotRequired", "type": "bool", "required": False},
     {"name": "BoolNotRequiredDefault", "type": "bool", "required": False, "default": True},
     {"name": "BoolRequiredDefault", "type": "bool", "required": True, "default": True},
+    {"name": "BoolList", "type": "bool", "collection_type": "List"},
+    {"name": "BoolOptionalList", "type": "bool", "collection_type": "List", "required": False},
 ]
 
 
@@ -56,7 +65,10 @@ def test_make_map_pydantic_columns():
     for col_config in src_columns:
         pred_col_type, pred_field_info = cols_configs.pop(col_config["name"])
 
-        assert pred_col_type is column_types_map[col_config["type"]]
+        if "collection_type" in col_config:
+            assert pred_col_type is column_types_map[col_config["collection_type"]][column_types_map[col_config["type"]]]
+        else:
+            assert pred_col_type is column_types_map[col_config["type"]]
         assert pred_field_info.title == col_config["name"]
         if 'default' in col_config:
             assert pred_field_info.default is col_config["default"]
@@ -81,7 +93,10 @@ def test_make_map_pydantic_validation():
         )
 
         # Check valid type
-        _test_pydantic_model(**{col_config["name"]: pred_col_type(1)})
+        if "collection_type" in col_config:
+            _test_pydantic_model(**{col_config["name"]: [1]})
+        else:
+            _test_pydantic_model(**{col_config["name"]: pred_col_type(1)})
         # Check not valid type
         try:
             not_valid_value = list
@@ -94,12 +109,18 @@ def test_make_map_pydantic_validation():
             elif pred_col_type is bool:
                 assert msg_str == 'value could not be parsed to a boolean'
             else:
-                assert msg_str.startswith(f'value is not a valid {col_config["type"]}')
+                if "collection_type" in col_config:
+                    assert msg_str.startswith(f'value is not a valid {col_config["collection_type"].lower()}')
+                else:
+                    assert msg_str.startswith(f'value is not a valid {col_config["type"]}')
 
         # Check required
         if col_config.get("required", True):
             # Check valid value
-            _test_pydantic_model(**{col_config["name"]: pred_col_type(1)})
+            if "collection_type" in col_config:
+                _test_pydantic_model(**{col_config["name"]: [1]})
+            else:
+                _test_pydantic_model(**{col_config["name"]: pred_col_type(1)})
             # Check none value
             try:
                 _test_pydantic_model(**{col_config["name"]: None})
@@ -144,6 +165,8 @@ def test_make_map_pydantic_columns_with_IS_X(model_for_columns):
                 assert pred_col_type is List[model_for_columns]
             else:
                 assert pred_col_type is List[Any]
+        elif "collection_type" in col_config:
+            assert pred_col_type is List[column_types_map[col_config["type"]]]
         else:
             assert pred_col_type is column_types_map[col_config["type"]]
         assert pred_field_info.title == col_config["name"]
@@ -166,12 +189,12 @@ def test_make_map_pydantic_columns_with_IS_X(model_for_columns):
 @pytest.mark.parametrize(
     'column_validation, columns, data, expected_data',
     [
-        (False, src_columns, [1, 2, 3], [1, 2, 3]),
+        (False, src_columns, [1, [4, 5], 2, 3], [1, [4, 5], 2, 3]),
         (
             True,
-            [src_columns[0], src_columns[6], src_columns[12]],
-            [{'Float': 1.0, 'Int': 1, 'Str': '1', 'NotExistsKey': 10}],
-            [{'Float': 1.0, 'Int': 1, 'Str': '1'}]
+            [src_columns[0], src_columns[6], src_columns[8], src_columns[16]],
+            [{'Float': 1.0, 'FloatList': [1.0, 2.0], 'Int': 1, 'Str': '1', 'NotExistsKey': 10}],
+            [{'Float': 1.0, 'FloatList': [1.0, 2.0], 'Int': 1, 'Str': '1'}]
         ),
     ],
     ids=['column_validation=False', 'column_validation=True']
diff --git a/tests/unit_tests/web/test_app.py b/tests/unit_tests/web/test_app.py
index 27b2e51..9711d57 100644
--- a/tests/unit_tests/web/test_app.py
+++ b/tests/unit_tests/web/test_app.py
@@ -365,6 +365,32 @@ async def test_predict_model_numpy_returned_valid(web_app_test_client, list_to_n
         assert response.json() == {"predict_result": [[1, 2, 3]]}
 
 
+@pytest.mark.asyncio
+async def test_predict_model_with_list_fields_column_validation(web_app_test_client, list_to_numpy_array_model):
+    mlup_model = MLupModel(
+        ml_model=list_to_numpy_array_model,
+        conf=ModelConfig(
+            data_transformer_for_predict=ModelDataTransformerType.NUMPY_ARR,
+            data_transformer_for_predicted=ModelDataTransformerType.NUMPY_ARR,
+            columns=[
+                {"name": "col1", "type": "int", "collection_type": "List"},
+                {"name": "col2", "type": "int", "collection_type": "List"},
+            ]
+        )
+    )
+    mlup_web_app = MLupWebApp(
+        ml=mlup_model,
+        conf=WebAppConfig(mode=WebAppArchitecture.directly_to_predict, column_validation=True)
+    )
+    mlup_model.load()
+    mlup_web_app.load()
+    with web_app_test_client(mlup_web_app) as api_test_client:
+        response = await api_test_client.post("/predict", json={'X': [{"col1": [1, 2, 3], "col2": [4, 5, 6]}]})
+        assert response.status_code == 200
+        assert response.headers['x-predict-id']
+        assert response.json() == {"predict_result": [[[1, 2, 3], [4, 5, 6]]]}
+
+
 @pytest.mark.asyncio
 async def test_predict_model_numpy_returned_not_valid(web_app_test_client, list_to_numpy_array_model):
     mlup_model = MLupModel(