From e59d51b00f6740a4415a98c5b02f5a384e96d9c4 Mon Sep 17 00:00:00 2001 From: Deys Timofey Date: Tue, 12 Nov 2024 22:51:09 +0500 Subject: [PATCH] Add tests. Add docs --- docs/config_file.md | 5 +- tests/unit_tests/ml/test_data_transformers.py | 102 ++++++++++++++---- tests/unit_tests/ml/test_model.py | 18 ++++ tests/unit_tests/web/test_api_validators.py | 39 +++++-- tests/unit_tests/web/test_app.py | 26 +++++ 5 files changed, 160 insertions(+), 30 deletions(-) diff --git a/docs/config_file.md b/docs/config_file.md index e59b385..d8e496d 100644 --- a/docs/config_file.md +++ b/docs/config_file.md @@ -34,11 +34,12 @@ Default is `ModelLibraryType.SKLEARN`. `columns (Optional[List[Dict[str, str]]])` - Columns description for model predict method. Format: List[Dict]. - Example [{"name": "col1", "type": "str", "required": True, "default": None}] + Example [{"name": "col1", "type": "str", "required": True, "default": None, "collection_type": "List"}] name - Column name; type - Column type in string: int, str, float, bool. required - bool and optional field. By default is True. - Default - Any data and optional. + default - Any data and optional. + collection_type - Type of collection is optional field. Supported: List. Default is None. If you not set columns, then columns validation should be False. diff --git a/tests/unit_tests/ml/test_data_transformers.py b/tests/unit_tests/ml/test_data_transformers.py index 100cd85..b083f21 100644 --- a/tests/unit_tests/ml/test_data_transformers.py +++ b/tests/unit_tests/ml/test_data_transformers.py @@ -93,11 +93,11 @@ def test_transform_to_model_format_from_list(self): def test_transform_to_model_format_from_dict_with_columns(self): data = [ - {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, - {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52}, - {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53}, - {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54}, - {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55}, + {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51, 'col6': [61, 71]}, + {'col1': 12, 'col2': 22, 'col3': 32, 'col4': 42, 'col5': 52, 'col6': [62, 72]}, + {'col1': 13, 'col2': 23, 'col3': 33, 'col4': 43, 'col5': 53, 'col6': [63, 73]}, + {'col1': 14, 'col2': 24, 'col3': 34, 'col4': 44, 'col5': 54, 'col6': [64, 74]}, + {'col1': 15, 'col2': 25, 'col3': 35, 'col4': 45, 'col5': 55, 'col6': [65, 75]}, ] cols = [ {'name': 'col1', 'type': 'int'}, @@ -105,6 +105,7 @@ def test_transform_to_model_format_from_dict_with_columns(self): {'name': 'col3', 'type': 'int'}, {'name': 'col4', 'type': 'int'}, {'name': 'col5', 'type': 'int'}, + {'name': 'col6', 'type': 'int', 'collection_type': 'List'}, ] # Check create pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) @@ -168,13 +169,14 @@ def test_transform_to_model_format_from_list(self): {'name': 'col3', 'type': 'int'}, {'name': 'colstr', 'type': 'str'}, {'name': 'colbool', 'type': 'bool'}, + {'name': 'collistfloat', 'type': 'float', 'collection_type': 'List'}, ] data = [ - [11, 21, 31, 'colstr', True], - [12, 22, 32, 'colstr', True], - [13, 23, 33, 'colstr', False], - [14, 24, 34, 'colstr', True], - [15, 25, 35, 'colstr', True], + [11, 21, 31, 'colstr', True, [41.0, 51.0]], + [12, 22, 32, 'colstr', True, [42.0, 52.0]], + [13, 23, 33, 'colstr', False, [43.0, 53.0]], + [14, 24, 34, 'colstr', True, [44.0, 54.0]], + [15, 25, 35, 'colstr', True, [45.0, 55.0]], ] df = self.transformer_class().transform_to_model_format(data, columns=cols) @@ -187,11 +189,11 @@ def test_transform_to_model_format_from_list(self): def test_transform_to_json_format(self): data = [ - {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}, - {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}, - {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}, - {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}, - {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}, + {'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}, + {'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}, + {'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}, + {'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}, + {'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}, ] df = pd.DataFrame(data=data) trans_data = self.transformer_class().transform_to_json_format(df) @@ -200,11 +202,11 @@ def test_transform_to_json_format(self): def test_transform_to_json_format_from_list(self): data = [ - [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True}], - [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True}], - [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False}], - [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True}], - [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True}], + [{'col1': 11, 'col2': 21, 'col3': 31, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [41.0, 51.0]}], + [{'col1': 12, 'col2': 22, 'col3': 32, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [42.0, 52.0]}], + [{'col1': 13, 'col2': 23, 'col3': 33, 'colstr': 'colstr', 'colbool': False, 'collistfloat': [43.0, 53.0]}], + [{'col1': 14, 'col2': 24, 'col3': 34, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [44.0, 54.0]}], + [{'col1': 15, 'col2': 25, 'col3': 35, 'colstr': 'colstr', 'colbool': True, 'collistfloat': [45.0, 55.0]}], ] dfs = [pd.DataFrame(data=d) for d in data] trans_data = self.transformer_class().transform_to_json_format(dfs) @@ -253,6 +255,26 @@ def test_transform_to_model_format_from_list(self): pred_d = self.transformer_class().transform_to_model_format(data) assert np.array_equal(pred_d, np.array(data)) + def test_transform_to_model_format_from_dict_with_list_columns(self): + data = [ + {'col1': [11, 111], 'col2': [21, 221]}, + {'col1': [12, 112], 'col2': [22, 222]}, + {'col1': [13, 113], 'col2': [23, 223]}, + {'col1': [14, 114], 'col2': [24, 224]}, + {'col1': [15, 115], 'col2': [25, 225]}, + ] + cols = [ + {'name': 'col1', 'type': 'int', 'collection_type': 'List'}, + {'name': 'col2', 'type': 'int', 'collection_type': 'List'}, + ] + # Check create + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) + assert np.array_equal(pred_d, np.array([list(v.values()) for v in data])) + + # Check order by columns + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1]) + assert np.array_equal(pred_d, np.array([list(v.values())[::-1] for v in data])) + def test_transform_to_model_format_from_dict_with_columns(self): data = [ {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, @@ -359,6 +381,26 @@ def test_transform_to_model_format_from_list(self): pred_d = self.transformer_class().transform_to_model_format(data) assert_tf_tensors(pred_d, tensorflow.convert_to_tensor(data)) + def test_transform_to_model_format_from_dict_with_list_columns(self): + data = [ + {'col1': [11, 111], 'col2': [21, 221]}, + {'col1': [12, 112], 'col2': [22, 222]}, + {'col1': [13, 113], 'col2': [23, 223]}, + {'col1': [14, 114], 'col2': [24, 224]}, + {'col1': [15, 115], 'col2': [25, 225]}, + ] + cols = [ + {'name': 'col1', 'type': 'int', 'collection_type': 'List'}, + {'name': 'col2', 'type': 'int', 'collection_type': 'List'}, + ] + # Check create + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) + assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values()) for v in data])) + + # Check order by columns + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1]) + assert_tf_tensors(pred_d, tensorflow.convert_to_tensor([list(v.values())[::-1] for v in data])) + def test_transform_to_model_format_from_dict_with_columns(self): data = [ {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, @@ -467,6 +509,26 @@ def test_transform_to_model_format_from_list(self): pred_d = self.transformer_class().transform_to_model_format(data) assert is_equal_torch_tensors(pred_d, torch.tensor(data)) + def test_transform_to_model_format_from_dict_with_list_columns(self): + data = [ + {'col1': [11, 111], 'col2': [21, 221]}, + {'col1': [12, 112], 'col2': [22, 222]}, + {'col1': [13, 113], 'col2': [23, 223]}, + {'col1': [14, 114], 'col2': [24, 224]}, + {'col1': [15, 115], 'col2': [25, 225]}, + ] + cols = [ + {'name': 'col1', 'type': 'int', 'collection_type': 'List'}, + {'name': 'col2', 'type': 'int', 'collection_type': 'List'}, + ] + # Check create + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols) + assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values()) for v in data])) + + # Check order by columns + pred_d = self.transformer_class().transform_to_model_format(data, columns=cols[::-1]) + assert is_equal_torch_tensors(pred_d, torch.tensor([list(v.values())[::-1] for v in data])) + def test_transform_to_model_format_from_dict_with_columns(self): data = [ {'col1': 11, 'col2': 21, 'col3': 31, 'col4': 41, 'col5': 51}, diff --git a/tests/unit_tests/ml/test_model.py b/tests/unit_tests/ml/test_model.py index ddc26bd..de2a4a7 100644 --- a/tests/unit_tests/ml/test_model.py +++ b/tests/unit_tests/ml/test_model.py @@ -800,3 +800,21 @@ async def test_predict_from_numpy(self): predicted_data = await mlup_model.predict_from(X=[1, 2, 3, 4, 5, 6, 7, 8, 9]) np.array_equal(predicted_data, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])) + + @pytest.mark.asyncio + async def test_predict_with_list_columns(self): + mlup_model = MLupModel( + ml_model=ModelWithX(), + conf=ModelConfig( + data_transformer_for_predict=ModelDataTransformerType.NUMPY_ARR, + data_transformer_for_predicted=ModelDataTransformerType.SRC_TYPES, + columns=[ + {"name": "col1", "type": "int", "collection_type": "List"}, + {"name": "col2", "type": "int", "collection_type": "List"}, + ], + ) + ) + mlup_model.load() + + predicted_data = await mlup_model.predict_from(X=[[1, 2], [3, 4], [5, 6]]) + np.array_equal(predicted_data, np.array([[1, 2], [3, 4], [5, 6]])) diff --git a/tests/unit_tests/web/test_api_validators.py b/tests/unit_tests/web/test_api_validators.py index 39eda74..1b7bf71 100644 --- a/tests/unit_tests/web/test_api_validators.py +++ b/tests/unit_tests/web/test_api_validators.py @@ -18,6 +18,7 @@ 'bool': bool, 'str': str, 'list': list, + 'List': List, } src_columns = [ {"name": "Float", "type": "float"}, @@ -26,6 +27,8 @@ {"name": "FloatNotRequired", "type": "float", "required": False}, {"name": "FloatNotRequiredDefault", "type": "float", "required": False, "default": 1.4}, {"name": "FloatRequiredDefault", "type": "float", "required": True, "default": 1.4}, + {"name": "FloatList", "type": "float", "collection_type": "List"}, + {"name": "FloatOptionalList", "type": "float", "collection_type": "List", "required": False}, {"name": "Int", "type": "int"}, {"name": "IntDefault", "type": "int", "default": 4}, @@ -33,6 +36,8 @@ {"name": "IntNotRequired", "type": "int", "required": False}, {"name": "IntNotRequiredDefault", "type": "int", "required": False, "default": 4}, {"name": "IntRequiredDefault", "type": "int", "required": True, "default": 4}, + {"name": "IntList", "type": "int", "collection_type": "List"}, + {"name": "IntOptionalList", "type": "int", "collection_type": "List", "required": False}, {"name": "Str", "type": "str"}, {"name": "StrDefault", "type": "str", "default": "str"}, @@ -40,6 +45,8 @@ {"name": "StrNotRequired", "type": "str", "required": False}, {"name": "StrNotRequiredDefault", "type": "str", "required": False, "default": "str"}, {"name": "StrRequiredDefault", "type": "str", "required": True, "default": "str"}, + {"name": "StrList", "type": "str", "collection_type": "List"}, + {"name": "StrOptionalList", "type": "str", "collection_type": "List", "required": False}, {"name": "Bool", "type": "bool"}, {"name": "BoolDefault", "type": "bool", "default": True}, @@ -47,6 +54,8 @@ {"name": "BoolNotRequired", "type": "bool", "required": False}, {"name": "BoolNotRequiredDefault", "type": "bool", "required": False, "default": True}, {"name": "BoolRequiredDefault", "type": "bool", "required": True, "default": True}, + {"name": "BoolList", "type": "bool", "collection_type": "List"}, + {"name": "BoolOptionalList", "type": "bool", "collection_type": "List", "required": False}, ] @@ -56,7 +65,10 @@ def test_make_map_pydantic_columns(): for col_config in src_columns: pred_col_type, pred_field_info = cols_configs.pop(col_config["name"]) - assert pred_col_type is column_types_map[col_config["type"]] + if "collection_type" in col_config: + assert pred_col_type is column_types_map[col_config["collection_type"]][column_types_map[col_config["type"]]] + else: + assert pred_col_type is column_types_map[col_config["type"]] assert pred_field_info.title == col_config["name"] if 'default' in col_config: assert pred_field_info.default is col_config["default"] @@ -81,7 +93,10 @@ def test_make_map_pydantic_validation(): ) # Check valid type - _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) + if "collection_type" in col_config: + _test_pydantic_model(**{col_config["name"]: [1]}) + else: + _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) # Check not valid type try: not_valid_value = list @@ -94,12 +109,18 @@ def test_make_map_pydantic_validation(): elif pred_col_type is bool: assert msg_str == 'value could not be parsed to a boolean' else: - assert msg_str.startswith(f'value is not a valid {col_config["type"]}') + if "collection_type" in col_config: + assert msg_str.startswith(f'value is not a valid {col_config["collection_type"].lower()}') + else: + assert msg_str.startswith(f'value is not a valid {col_config["type"]}') # Check required if col_config.get("required", True): # Check valid value - _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) + if "collection_type" in col_config: + _test_pydantic_model(**{col_config["name"]: [1]}) + else: + _test_pydantic_model(**{col_config["name"]: pred_col_type(1)}) # Check none value try: _test_pydantic_model(**{col_config["name"]: None}) @@ -144,6 +165,8 @@ def test_make_map_pydantic_columns_with_IS_X(model_for_columns): assert pred_col_type is List[model_for_columns] else: assert pred_col_type is List[Any] + elif "collection_type" in col_config: + assert pred_col_type is List[column_types_map[col_config["type"]]] else: assert pred_col_type is column_types_map[col_config["type"]] assert pred_field_info.title == col_config["name"] @@ -166,12 +189,12 @@ def test_make_map_pydantic_columns_with_IS_X(model_for_columns): @pytest.mark.parametrize( 'column_validation, columns, data, expected_data', [ - (False, src_columns, [1, 2, 3], [1, 2, 3]), + (False, src_columns, [1, [4, 5], 2, 3], [1, [4, 5], 2, 3]), ( True, - [src_columns[0], src_columns[6], src_columns[12]], - [{'Float': 1.0, 'Int': 1, 'Str': '1', 'NotExistsKey': 10}], - [{'Float': 1.0, 'Int': 1, 'Str': '1'}] + [src_columns[0], src_columns[6], src_columns[8], src_columns[16]], + [{'Float': 1.0, 'FloatList': [1.0, 2.0], 'Int': 1, 'Str': '1', 'NotExistsKey': 10}], + [{'Float': 1.0, 'FloatList': [1.0, 2.0], 'Int': 1, 'Str': '1'}] ), ], ids=['column_validation=False', 'column_validation=True'] diff --git a/tests/unit_tests/web/test_app.py b/tests/unit_tests/web/test_app.py index 27b2e51..9711d57 100644 --- a/tests/unit_tests/web/test_app.py +++ b/tests/unit_tests/web/test_app.py @@ -365,6 +365,32 @@ async def test_predict_model_numpy_returned_valid(web_app_test_client, list_to_n assert response.json() == {"predict_result": [[1, 2, 3]]} +@pytest.mark.asyncio +async def test_predict_model_with_list_fields_column_validation(web_app_test_client, list_to_numpy_array_model): + mlup_model = MLupModel( + ml_model=list_to_numpy_array_model, + conf=ModelConfig( + data_transformer_for_predict=ModelDataTransformerType.NUMPY_ARR, + data_transformer_for_predicted=ModelDataTransformerType.NUMPY_ARR, + columns=[ + {"name": "col1", "type": "int", "collection_type": "List"}, + {"name": "col2", "type": "int", "collection_type": "List"}, + ] + ) + ) + mlup_web_app = MLupWebApp( + ml=mlup_model, + conf=WebAppConfig(mode=WebAppArchitecture.directly_to_predict, column_validation=True) + ) + mlup_model.load() + mlup_web_app.load() + with web_app_test_client(mlup_web_app) as api_test_client: + response = await api_test_client.post("/predict", json={'X': [{"col1": [1, 2, 3], "col2": [4, 5, 6]}]}) + assert response.status_code == 200 + assert response.headers['x-predict-id'] + assert response.json() == {"predict_result": [[[1, 2, 3], [4, 5, 6]]]} + + @pytest.mark.asyncio async def test_predict_model_numpy_returned_not_valid(web_app_test_client, list_to_numpy_array_model): mlup_model = MLupModel(