diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cc0a47..ed982db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,8 +3,17 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). + +## [1.0.0] - 2024-08-28 -## [v0.0.2] - 2024-07-25 +### Added +- Test for TextGenerationChatModel. +### Changed + +### Fixed +- Added @abstractmethod decorators to all abstract functions in function_models and aixplain_models. + +## [0.0.2] - 2024-07-25 ### Added - Added support for script nodes. diff --git a/aixplain/model_interfaces/__version__.py b/aixplain/model_interfaces/__version__.py index 4d64870..6aa2bf2 100644 --- a/aixplain/model_interfaces/__version__.py +++ b/aixplain/model_interfaces/__version__.py @@ -1,7 +1,7 @@ __title__ = "model-interfaces" __description__ = "model-interfaces is the interface to host your models on aiXplain" __url__ = "https://github.com/aixplain/aixplain-models/tree/main/docs" -__version__ = "0.0.2" +__version__ = "1.0.0" __author__ = "Duraikrishna Selvaraju and Michael Lam" __author_email__ = "krishna.durai@aixplain.com" __license__ = "http://www.apache.org/licenses/LICENSE-2.0" diff --git a/aixplain/model_interfaces/interfaces/aixplain_model.py b/aixplain/model_interfaces/interfaces/aixplain_model.py index 80b0910..a81cf22 100644 --- a/aixplain/model_interfaces/interfaces/aixplain_model.py +++ b/aixplain/model_interfaces/interfaces/aixplain_model.py @@ -1,12 +1,16 @@ from kserve.model import Model from typing import Dict, List +from abc import abstractmethod + from aixplain.model_interfaces.schemas.function.function_input import APIInput from aixplain.model_interfaces.schemas.function.function_output import APIOutput class AixplainModel(Model): + @abstractmethod def run_model(self, api_input: Dict[str, List[APIInput]], headers: Dict[str, str] = None) -> Dict[str, List[APIOutput]]: pass + @abstractmethod def predict(self, request: Dict[str, List[APIInput]], headers: Dict[str, str] = None) -> Dict[str, List[APIOutput]]: pass \ No newline at end of file diff --git a/aixplain/model_interfaces/interfaces/function_models.py b/aixplain/model_interfaces/interfaces/function_models.py index 2678dc9..6a23815 100644 --- a/aixplain/model_interfaces/interfaces/function_models.py +++ b/aixplain/model_interfaces/interfaces/function_models.py @@ -4,6 +4,7 @@ from typing import Dict, List, Union, Optional, Text from enum import Enum from pydantic import BaseModel, validate_call +from abc import abstractmethod from aixplain.model_interfaces.schemas.function.function_input import ( SegmentationInput, @@ -41,163 +42,125 @@ from aixplain.model_interfaces.schemas.modality.modality_output import TextListOutput from aixplain.model_interfaces.interfaces.aixplain_model import AixplainModel +class TranslationPredictInput(BaseModel): + instances: List[TranslationInput] + class TranslationModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[TranslationInput]], headers: Dict[str, str] = None) -> Dict[str, List[TranslationOutput]]: - pass + @abstractmethod + @validate_call + def run_model(self, api_input: List[TranslationInput], headers: Dict[str, str] = None) -> List[TranslationOutput]: + raise NotImplementedError + + @validate_call + def predict(self, request: TranslationPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - translation_input_list = [] - # Convert JSON serializables into TranslationInputs - for instance in instances: - translation_input = TranslationInput(**instance) - translation_input_list.append(translation_input) - translation_output = self.run_model({"instances": translation_input_list}, headers) - - # Convert JSON serializables into TranslationOutputs - for i in range(len(translation_output["predictions"])): - translation_output_dict = translation_output["predictions"][i].dict() - TranslationOutput(**translation_output_dict) - translation_output["predictions"][i] = translation_output_dict - return translation_output +class SpeechRecognitionPredictInput(BaseModel): + instances: List[SpeechRecognitionInput] class SpeechRecognitionModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[SpeechRecognitionInput]], headers: Dict[str, str] = None) -> Dict[str, List[SpeechRecognitionOutput]]: + @abstractmethod + @validate_call + def run_model(self, api_input: List[SpeechRecognitionInput], headers: Dict[str, str] = None) -> List[SpeechRecognitionOutput]: pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - - instances = request['instances'] - sr_input_list = [] - # Convert JSON serializables into SpeechRecognitionInputs - for instance in instances: - sr_input = SpeechRecognitionInput(**instance) - sr_input_list.append(sr_input) - sr_output = self.run_model({"instances": sr_input_list}, headers) - + @validate_call + def predict(self, request: SpeechRecognitionPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output - # Convert JSON serializables into SpeechRecognitionOutputs - for i in range(len(sr_output["predictions"])): - sr_output_dict = sr_output["predictions"][i].dict() - SpeechRecognitionOutput(**sr_output_dict) - sr_output["predictions"][i] = sr_output_dict - return sr_output +class DiacritizationPredictInput(BaseModel): + instances: List[DiacritizationInput] class DiacritizationModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[DiacritizationInput]], headers: Dict[str, str] = None) -> Dict[str, List[DiacritizationOutput]]: + @abstractmethod + @validate_call + def run_model(self, api_input: List[DiacritizationInput], headers: Dict[str, str] = None) -> List[DiacritizationOutput]: pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - diacritiztn_input_list = [] - # Convert JSON serializables into DiacritizationInputs - for instance in instances: - diacritiztn_input = DiacritizationInput(**instance) - diacritiztn_input_list.append(diacritiztn_input) - diacritiztn_output = self.run_model({"instances": diacritiztn_input_list}, headers) - - # Convert JSON serializables into DiacritizationOutputs - for i in range(len(diacritiztn_output["predictions"])): - diacritiztn_output_dict = diacritiztn_output["predictions"][i].dict() - DiacritizationOutput(**diacritiztn_output_dict) - diacritiztn_output["predictions"][i] = diacritiztn_output_dict - return diacritiztn_output + @validate_call + def predict(self, request: DiacritizationPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output + +class ClassificationPredictInput(BaseModel): + instances: List[ClassificationInput] class ClassificationModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[ClassificationInput]], headers: Dict[str, str] = None) -> Dict[str, List[ClassificationOutput]]: - pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - classification_input_list = [] - # Convert JSON serializables into ClassificationInputs - for instance in instances: - classification_input = ClassificationInput(**instance) - classification_input_list.append(classification_input) - classification_output = self.run_model({"instances": classification_input_list}, headers) - - # Convert JSON serializables into ClassificationOutputs - for i in range(len(classification_output["predictions"])): - classification_output_dict = classification_output["predictions"][i].dict() - ClassificationOutput(**classification_output_dict) - classification_output["predictions"][i] = classification_output_dict - return classification_output + @abstractmethod + @validate_call + def run_model(self, api_input: List[ClassificationInput], headers: Dict[str, str] = None) -> List[ClassificationOutput]: + raise NotImplementedError + + @validate_call + def predict(self, request: ClassificationPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output + +class SpeechEnhancementPredictInput(BaseModel): + instances: List[SpeechEnhancementInput] class SpeechEnhancementModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[SpeechEnhancementInput]], headers: Dict[str, str] = None) -> Dict[str, List[SpeechEnhancementOutput]]: + @abstractmethod + @validate_call + def run_model(self, api_input: List[SpeechEnhancementInput], headers: Dict[str, str] = None) -> List[SpeechEnhancementOutput]: pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - speech_enhancement_input_list = [] - # Convert JSON serializables into SpeechEnhancementInputs - for instance in instances: - speech_enhancement_input = SpeechEnhancementInput(**instance) - speech_enhancement_input_list.append(speech_enhancement_input) - speech_enhancement_output = self.run_model({"instances": speech_enhancement_input_list}, headers) - - # Convert JSON serializables into SpeechEnhancementOutputs - for i in range(len(speech_enhancement_output["predictions"])): - speech_enhancement_dict = speech_enhancement_output["predictions"][i].dict() - SpeechEnhancementOutput(**speech_enhancement_dict) - try: - speech_enhancement_dict["audio_config"]["audio_encoding"] = speech_enhancement_dict["audio_config"]["audio_encoding"].value - except AttributeError as e: - raise tornado.web.HTTPError( - status_code=HTTPStatus.UNPROCESSABLE_ENTITY, - reason="The user request, although correct, is generating unacceptable output from the server." - ) - - speech_enhancement_output["predictions"][i] = speech_enhancement_dict - return speech_enhancement_output + @validate_call + def predict(self, request: SpeechEnhancementPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output -class SpeechSynthesis(AixplainModel): - def run_model(self, api_input: Dict[str, List[SpeechSynthesisInput]], headers: Dict[str, str] = None) -> Dict[str, List[SpeechSynthesisOutput]]: - pass +class SpeechSynthesisPredictInput(BaseModel): + instances: List[SpeechEnhancementInput] - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - speech_synthesis_input_list = [] - # Convert JSON serializables into SpeechEnhancementInputs - for instance in instances: - speech_synthesis_input = SpeechSynthesisInput(**instance) - speech_synthesis_input_list.append(speech_synthesis_input) +class SpeechSynthesis(AixplainModel): - speech_synthesis_output = self.run_model({"instances": speech_synthesis_input_list}, headers) + @abstractmethod + @validate_call + def run_model(self, api_input: List[SpeechSynthesisInput], headers: Dict[str, str] = None) -> List[SpeechSynthesisOutput]: + pass + @validate_call + def predict(self, request: SpeechSynthesisPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output - # Convert JSON serializables into SpeechEnhancementOutputs - for i in range(len(speech_synthesis_output["instances"])): - speech_synthesis_dict = speech_synthesis_output["instances"][i].dict() - SpeechSynthesisOutput(**speech_synthesis_dict) - speech_synthesis_output["instances"][i] = speech_synthesis_dict - return speech_synthesis_output +class TextToImageGenerationPredictInput(BaseModel): + instances: List[TextToImageGenerationInput] class TextToImageGeneration(AixplainModel): - def run_model(self, api_input: Dict[str, List[TextToImageGenerationInput]], headers: Dict[str, str] = None) -> Dict[str, List[TextToImageGenerationOutput]]: + + @abstractmethod + @validate_call + def run_model(self, api_input: List[TextToImageGenerationInput], headers: Dict[str, str] = None) -> List[TextToImageGenerationOutput]: pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - text_to_image_generation_input_list = [] - # Convert JSON serializables into TextToImageGenerationInputs - for instance in instances: - text_to_image_generation_input = TextToImageGenerationInput(**instance) - text_to_image_generation_input_list.append(text_to_image_generation_input) - - text_to_image_generation_output = self.run_model({"instances": text_to_image_generation_input_list}) - - # Convert JSON serializables into TextToImageGenerationOutputs - for i in range(len(text_to_image_generation_output["predictions"])): - text_to_image_generation_dict = text_to_image_generation_output["predictions"][i].dict() - TextToImageGenerationOutput(**text_to_image_generation_dict) - text_to_image_generation_output["predictions"][i] = text_to_image_generation_dict - return text_to_image_generation_output + @validate_call + def predict(self, request: TextToImageGenerationPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output class TextGenerationChatTemplatizeInput(BaseModel): data: List[Dict] @@ -213,6 +176,7 @@ class TextGenerationTokenizeOutput(BaseModel): token_counts: List[List[int]] class TextGenerationModel(AixplainModel): + @validate_call def predict(self, request: TextGenerationPredictInput, headers: Dict[str, str] = None) -> Dict: instances = request.instances @@ -229,15 +193,19 @@ def predict(self, request: TextGenerationPredictInput, headers: Dict[str, str] = else: raise ValueError("Invalid function.") + @abstractmethod @validate_call def run_model(self, api_input: List[TextGenerationInput], headers: Dict[str, str] = None) -> List[TextGenerationOutput]: raise NotImplementedError + @abstractmethod @validate_call def tokenize(self, api_input: List[TextListInput], headers: Dict[str, str] = None) -> List[List[int]]: raise NotImplementedError class TextGenerationChatModel(TextGenerationModel): + + @abstractmethod @validate_call def run_model(self, api_input: List[TextInput], headers: Dict[str, str] = None) -> List[TextGenerationOutput]: raise NotImplementedError @@ -263,134 +231,117 @@ def predict(self, request: TextGenerationPredictInput, headers: Dict[str, str] = else: raise ValueError("Invalid function.") + @abstractmethod @validate_call def templatize(self, api_input: List[TextGenerationChatTemplatizeInput], headers: Dict[str, str] = None) -> List[Text]: - pass + raise NotImplementedError + + # NOTE: TOKENIZE is inherited from TextGenerationModel and must also be + # implemented. See method signature above. + +class TextSummarizationPredictInput(BaseModel): + instances: List[TextSummarizationInput] class TextSummarizationModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[TextSummarizationInput]], headers: Dict[str, str] = None) -> Dict[str, List[TextSummarizationOutput]]: - pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - text_summarization_input_list = [] - # Convert JSON serializables into TextSummarizationInputs - for instance in instances: - text_summarization_input = TextSummarizationInput(**instance) - text_summarization_input_list.append(text_summarization_input) - - text_summarization_output = self.run_model({"instances": text_summarization_input_list}) - - # Convert JSON serializables into TextSummarizationOutputs - for i in range(len(text_summarization_output["predictions"])): - text_summarization_dict = text_summarization_output["predictions"][i].dict() - TextSummarizationOutput(**text_summarization_dict) - text_summarization_output["predictions"][i] = text_summarization_dict - return text_summarization_output + @abstractmethod + @validate_call + def run_model(self, api_input: List[TextSummarizationInput], headers: Dict[str, str] = None) -> List[TextSummarizationOutput]: + raise NotImplementedError + + @validate_call + def predict(self, request: TextSummarizationPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output +class SearchModelPredictInput(BaseModel): + instances: List[SearchInput] + class SearchModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[SearchInput]], headers: Dict[str, str] = None) -> Dict[str, List[SearchOutput]]: + + @abstractmethod + @validate_call + def run_model(self, api_input: List[SearchInput], headers: Dict[str, str] = None) -> List[SearchOutput]: pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - search_input_list = [] - # Convert JSON serializables into SearchInputs - for instance in instances: - search_input = SearchInput(**instance) - search_input_list.append(search_input) - - search_output = self.run_model({"instances": search_input_list}) - - # Convert JSON serializables into SearchOutputs - for i in range(len(search_output["predictions"])): - search_dict = search_output["predictions"][i].dict() - SearchOutput(**search_dict) - search_output["predictions"][i] = search_dict - return search_output + @validate_call + def predict(self, request: SearchModelPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output +class TextReconstructionPredictInput(BaseModel): + instances: List[TextReconstructionInput] + class TextReconstructionModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[TextReconstructionInput]], headers: Dict[str, str] = None) -> Dict[str, List[TextReconstructionInput]]: - pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - text_reconstruction_input_list = [] - # Convert JSON serializables into TextReconstructionInputs - for instance in instances: - text_reconstruction_input = TextReconstructionInput(**instance) - text_reconstruction_input_list.append(text_reconstruction_input) - - text_reconstruction_output = self.run_model({"instances": text_reconstruction_input_list}) - - # Convert JSON serializables into TextReconstructionOutputs - for i in range(len(text_reconstruction_output["predictions"])): - text_reconstruction_dict = text_reconstruction_output["predictions"][i].dict() - TextReconstructionOutput(**text_reconstruction_dict) - text_reconstruction_output["predictions"][i] = text_reconstruction_dict - return text_reconstruction_output - + @abstractmethod + @validate_call + def run_model(self, api_input: List[TextReconstructionInput], headers: Dict[str, str] = None) -> List[TextReconstructionInput]: + raise NotImplementedError + + @validate_call + def predict(self, request: TextReconstructionPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output + +class FillTextMaskPredictInput(BaseModel): + instances: List[FillTextMaskInput] + class FillTextMaskModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[FillTextMaskInput]], headers: Dict[str, str] = None) -> Dict[str, List[FillTextMaskOutput]]: - pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - fill_text_mask_input_list = [] - # Convert JSON serializables into FillTextMaskInputs - for instance in instances: - fill_text_mask_input = FillTextMaskInput(**instance) - fill_text_mask_input_list.append(fill_text_mask_input) - - fill_text_mask_output = self.run_model({"instances": fill_text_mask_input_list}) - - # Convert JSON serializables into FillTextMaskOutputs - for i in range(len(fill_text_mask_output["predictions"])): - fill_text_mask_dict = fill_text_mask_output["predictions"][i].dict() - FillTextMaskOutput(**fill_text_mask_dict) - fill_text_mask_output["predictions"][i] = fill_text_mask_dict - return fill_text_mask_output + @abstractmethod + @validate_call + def run_model(self, api_input: List[FillTextMaskInput], headers: Dict[str, str] = None) -> List[FillTextMaskOutput]: + raise NotImplementedError + + @validate_call + def predict(self, request: FillTextMaskPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output +class SubtitleTranslationPredictInput(BaseModel): + instances: List[SubtitleTranslationInput] + class SubtitleTranslationModel(AixplainModel): - def run_model(self, api_input: Dict[str, List[SubtitleTranslationInput]], headers: Dict[str, str] = None) -> Dict[str, List[SubtitleTranslationOutput]]: - pass - def predict(self, request: Dict[str, str], headers: Dict[str, str] = None) -> Dict: - instances = request['instances'] - subtitle_translation_input_list = [] - # Convert JSON serializables into SubtitleTranslationInputs - for instance in instances: - subtitle_translation_input = SubtitleTranslationInput(**instance) - subtitle_translation_input_list.append(subtitle_translation_input) - - subtitle_translation_output = self.run_model({"instances": subtitle_translation_input_list}) - - # Convert JSON serializables into SubtitleTranslationOutput - for i in range(len(subtitle_translation_output["predictions"])): - subtitle_translation_dict = subtitle_translation_output["predictions"][i].dict() - SubtitleTranslationOutput(**subtitle_translation_dict) - subtitle_translation_output["predictions"][i] = subtitle_translation_dict - return subtitle_translation_output + @abstractmethod + @validate_call + def run_model(self, api_input: List[SubtitleTranslationInput], headers: Dict[str, str] = None) -> List[SubtitleTranslationOutput]: + raise NotImplementedError + + @validate_call + def predict(self, request: SubtitleTranslationPredictInput, headers: Dict[str, str] = None) -> Dict: + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output + +class SegmentationPredictInput(BaseModel): + instances: List[SegmentationInput] class SegmentationModel(AixplainModel): + + @abstractmethod + @validate_call def run_model( self, - api_input: Dict[str, List[SegmentationInput]], + api_input: List[SegmentationInput], headers: Dict[str, str] = None - ) -> Dict[str, List[SegmentationOutput]]: - pass + ) -> List[SegmentationOutput]: + raise NotImplementedError - def predict(self, request: Dict[str, str], + @validate_call + def predict(self, request: SegmentationPredictInput, headers: Dict[str, str] = None) -> dict: - instances = [] - - for instance in request['instances']: - segmentation_input = SegmentationInput(**instance) - instances.append(segmentation_input) - - output = self.run_model({"instances": instances}, headers) - - for i, prediction in enumerate(output["predictions"]): - output["predictions"][i] = prediction.dict() - - return output \ No newline at end of file + predict_output = { + "predictions": self.run_model(request.instances, headers) + } + return predict_output \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1f3121c..d230cf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ includes = ["aixplain"] [project] name = "model-interfaces" -version = "0.0.2" +version = "1.0.0" description = "A package specifying the model interfaces supported by aiXplain" license = { text = "Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0" } dependencies = [ diff --git a/tests/unit_tests/models/test_mock_classification.py b/tests/unit_tests/models/test_mock_classification.py index 3367108..ded0c01 100644 --- a/tests/unit_tests/models/test_mock_classification.py +++ b/tests/unit_tests/models/test_mock_classification.py @@ -26,13 +26,13 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) output_dict = predict_output["predictions"][0] - assert output_dict["data"] == "positive" - assert output_dict["predicted_labels"][0]["label"] == "positive" - assert output_dict["predicted_labels"][0]["confidence"] == 0.7 + assert output_dict.data == "positive" + assert output_dict.predicted_labels[0].label == "positive" + assert output_dict.predicted_labels[0].confidence == 0.7 class MockModel(ClassificationModel): - def run_model(self, api_input: Dict[str, List[ClassificationInput]], headers: Dict[str, str] = None) -> Dict[str, List[ClassificationOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[ClassificationInput], headers: Dict[str, str] = None) -> List[ClassificationOutput]: + instances = api_input predictions_list = [] # There's only 1 instance in this case. for instance in instances: @@ -50,7 +50,6 @@ def run_model(self, api_input: Dict[str, List[ClassificationInput]], headers: Di "data": data, "predicted_labels": labels } - speech_recognition_output = ClassificationOutput(**output_dict) - predictions_list.append(speech_recognition_output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + classification_output = ClassificationOutput(**output_dict) + predictions_list.append(classification_output) + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_diacritization.py b/tests/unit_tests/models/test_mock_diacritization.py index c9986bf..2812857 100644 --- a/tests/unit_tests/models/test_mock_diacritization.py +++ b/tests/unit_tests/models/test_mock_diacritization.py @@ -26,16 +26,15 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) output_dict = predict_output["predictions"][0] - assert output_dict["data"] == "السَّلَامُ عَلَيْكُمْ" - assert output_dict["details"]["text"] == "السَّلَامُ عَلَيْكُمْ" - assert output_dict["details"]["confidence"] == 0.7 + assert output_dict.data == "السَّلَامُ عَلَيْكُمْ" + assert output_dict.details.text == "السَّلَامُ عَلَيْكُمْ" + assert output_dict.details.confidence == 0.7 class MockModel(DiacritizationModel): - def run_model(self, api_input: Dict[str, List[DiacritizationInput]], headers: Dict[str, str] = None) -> Dict[str, List[DiacritizationOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[DiacritizationInput], headers: Dict[str, str] = None) -> List[DiacritizationOutput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = ("السَّلَامُ عَلَيْكُمْ", 0.7) @@ -52,5 +51,4 @@ def run_model(self, api_input: Dict[str, List[DiacritizationInput]], headers: Di } speech_recognition_output = DiacritizationOutput(**output_dict) predictions_list.append(speech_recognition_output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_fill_text_mask.py b/tests/unit_tests/models/test_mock_fill_text_mask.py index bf77535..88219d5 100644 --- a/tests/unit_tests/models/test_mock_fill_text_mask.py +++ b/tests/unit_tests/models/test_mock_fill_text_mask.py @@ -31,14 +31,13 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) output_dict = predict_output["predictions"][0] - assert output_dict["data"] == "We are filling a text mask." + assert output_dict.data == "We are filling a text mask." class MockModel(FillTextMaskModel): - def run_model(self, api_input: Dict[str, List[FillTextMaskInput]], headers: Dict[str, str] = None) -> Dict[str, List[FillTextMaskOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[FillTextMaskInput], headers: Dict[str, str] = None) -> List[FillTextMaskOutput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = "We are filling a text mask." @@ -53,5 +52,4 @@ def run_model(self, api_input: Dict[str, List[FillTextMaskInput]], headers: Dict } output = FillTextMaskOutput(**output_dict) predictions_list.append(output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_speech_enhancement.py b/tests/unit_tests/models/test_mock_speech_enhancement.py index b383c88..0f512d8 100644 --- a/tests/unit_tests/models/test_mock_speech_enhancement.py +++ b/tests/unit_tests/models/test_mock_speech_enhancement.py @@ -35,14 +35,13 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) output_dict = predict_output["predictions"][0] - assert output_dict["data"] == "VGhpcyBpcyBhbiBhdWRpbyBvdXRwdXQ=" + assert output_dict.data == "VGhpcyBpcyBhbiBhdWRpbyBvdXRwdXQ=" class MockModel(SpeechEnhancementModel): - def run_model(self, api_input: Dict[str, List[SpeechEnhancementInput]], headers: Dict[str, str] = None) -> Dict[str, List[SpeechEnhancementOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[SpeechEnhancementInput], headers: Dict[str, str] = None) -> List[SpeechEnhancementOutput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = encode(b"This is an audio output") @@ -59,5 +58,4 @@ def run_model(self, api_input: Dict[str, List[SpeechEnhancementInput]], headers: } speech_recognition_output = SpeechEnhancementOutput(**output_dict) predictions_list.append(speech_recognition_output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_speech_recognition.py b/tests/unit_tests/models/test_mock_speech_recognition.py index 5aaa599..e673bb4 100644 --- a/tests/unit_tests/models/test_mock_speech_recognition.py +++ b/tests/unit_tests/models/test_mock_speech_recognition.py @@ -35,16 +35,15 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) output_dict = predict_output["predictions"][0] - assert output_dict["data"] == "This is a test transcription" - assert output_dict["details"]["text"] == "This is a test transcription" - assert output_dict["details"]["confidence"] == 0.7 + assert output_dict.data == "This is a test transcription" + assert output_dict.details.text == "This is a test transcription" + assert output_dict.details.confidence == 0.7 class MockModel(SpeechRecognitionModel): - def run_model(self, api_input: Dict[str, List[SpeechRecognitionInput]], headers: Dict[str, str] = None) -> Dict[str, List[SpeechRecognitionOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[SpeechRecognitionInput], headers: Dict[str, str] = None) -> List[SpeechRecognitionOutput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = ("This is a test transcription", 0.7) @@ -61,5 +60,4 @@ def run_model(self, api_input: Dict[str, List[SpeechRecognitionInput]], headers: } speech_recognition_output = SpeechRecognitionOutput(**output_dict) predictions_list.append(speech_recognition_output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_subtitle_translation.py b/tests/unit_tests/models/test_mock_subtitle_translation.py index 2303d56..e8141f2 100644 --- a/tests/unit_tests/models/test_mock_subtitle_translation.py +++ b/tests/unit_tests/models/test_mock_subtitle_translation.py @@ -33,14 +33,13 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) output_dict = predict_output["predictions"][0] - assert output_dict["data"] == "This is a subtitle translation." + assert output_dict.data == "This is a subtitle translation." class MockModel(SubtitleTranslationModel): - def run_model(self, api_input: Dict[str, List[SubtitleTranslationInput]], headers: Dict[str, str] = None) -> Dict[str, List[SubtitleTranslationOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[SubtitleTranslationInput], headers: Dict[str, str] = None) -> List[SubtitleTranslationOutput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = "This is a subtitle translation." @@ -55,5 +54,4 @@ def run_model(self, api_input: Dict[str, List[SubtitleTranslationInput]], header } search_output = SubtitleTranslationOutput(**output_dict) predictions_list.append(search_output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_text_generation.py b/tests/unit_tests/models/test_mock_text_generation.py index 8aa318c..bdf2877 100644 --- a/tests/unit_tests/models/test_mock_text_generation.py +++ b/tests/unit_tests/models/test_mock_text_generation.py @@ -2,6 +2,7 @@ from aixplain.model_interfaces.schemas.function.function_input import TextGenerationInput from aixplain.model_interfaces.schemas.function.function_output import TextGenerationOutput from aixplain.model_interfaces.interfaces.function_models import TextGenerationModel +from aixplain.model_interfaces.schemas.modality.modality_input import TextListInput from typing import Dict, List class TestMockTextGeneration(): @@ -25,6 +26,21 @@ def test_predict(self): assert predictions.data == "I am a text generation model." + def test_tokenize(self): + tokenize_input = { + # provide a list of test instances + "instances": [ + { + "data": ["Hello world", "Hello world again"] + } + ], + "function": "TOKENIZE" + } + mock_model = MockModel("Mock") + token_counts_list = mock_model.predict(tokenize_input) + + assert token_counts_list["token_counts"][0] == [11, 17] + class MockModel(TextGenerationModel): def run_model(self, api_input: List[TextGenerationInput], headers: Dict[str, str] = None) -> List[TextGenerationOutput]: print(f"API INPUT: {api_input}") @@ -47,4 +63,11 @@ def run_model(self, api_input: List[TextGenerationInput], headers: Dict[str, str text_generation_output = TextGenerationOutput(**output_dict) predictions_list.append(text_generation_output) predict_output = predictions_list - return predict_output \ No newline at end of file + return predict_output + + def tokenize(self, api_input: List[TextListInput], headers: Dict[str, str] = None) -> List[List[int]]: + token_counts_list = [] + for instance in api_input: + token_counts = [len(message) for message in instance.data] + token_counts_list.append(token_counts) + return token_counts_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_text_generation_chat.py b/tests/unit_tests/models/test_mock_text_generation_chat.py new file mode 100644 index 0000000..fb80423 --- /dev/null +++ b/tests/unit_tests/models/test_mock_text_generation_chat.py @@ -0,0 +1,118 @@ +from unittest.mock import Mock +from aixplain.model_interfaces.schemas.function.function_input import TextGenerationInput +from aixplain.model_interfaces.schemas.function.function_output import TextGenerationOutput +from aixplain.model_interfaces.interfaces.function_models import TextGenerationChatModel, TextGenerationChatTemplatizeInput +from aixplain.model_interfaces.schemas.modality.modality_input import TextListInput +from typing import Dict, List, Text + +class TestMockTextGeneration(): + def test_predict(self): + predict_input = { + "instances": [ + { + "data": "How many cups in a liter?", + "max_new_tokens": 200, + "top_p": 0.92, + "top_k": 1, + "num_return_sequences": 1 + } + ], + "function": "predict" + } + + mock_model = MockModel("Mock") + predict_output = mock_model.predict(predict_input) + predictions = predict_output["predictions"][0] + + assert predictions.data == "I am a text generation model." + + def test_tokenize(self): + tokenize_input = { + # provide a list of test instances + "instances": [ + { + "data": ["Hello world", "Hello world again"] + } + ], + "function": "TOKENIZE" + } + mock_model = MockModel("Mock") + token_counts_list = mock_model.predict(tokenize_input) + print(f"Token counts: {token_counts_list}") + + assert token_counts_list["token_counts"][0] == [11, 17] + + def test_templatize(self): + data_to_be_templatized = [ + { + "role": "user", + "content": "Hello, how are you?" + }, + { + "role": "assistant", + "content": "I'm doing great. How can I help you today?" + }, + { + "role": "user", + "content": "I'd like to show off how chat templating works!" + }, + { + "role": "system", + "content": "I'd like to show off how chat templating works!" + } + ] + templatize_input = { + "instances": [ + { + "data": data_to_be_templatized + } + ], + "function": "TEMPLATIZE" + } + + mock_model = MockModel("Mock") + templatized_text = mock_model.predict(templatize_input) + + assert templatized_text["prompts"][0] == f"Mock template: {str(data_to_be_templatized)}" + # for i in range(len(data_to_be_templatized)): + # print(f"templatized_text: {templatized_text}") + # assert templatized_text["prompts"][i] == f"Mock template: {str(data_to_be_templatized[i])}" + + +class MockModel(TextGenerationChatModel): + def run_model(self, api_input: List[TextGenerationInput], headers: Dict[str, str] = None) -> List[TextGenerationOutput]: + print(f"API INPUT: {api_input}") + instances = api_input + predictions_list = [] + # There's only 1 instance in this case. + for instance in instances: + instance_data = instance.dict() + model_instance = Mock() + model_instance.process_data.return_value = "I am a text generation model." + result = model_instance.process_data(instance_data["data"]) + model_instance.delete() + + # Map back onto TextGenerationOutput + data = result + + output_dict = { + "data": data, + } + text_generation_output = TextGenerationOutput(**output_dict) + predictions_list.append(text_generation_output) + predict_output = predictions_list + return predict_output + + def tokenize(self, api_input: List[TextListInput], headers: Dict[str, str] = None) -> List[List[int]]: + token_counts_list = [] + for instance in api_input: + token_counts = [len(message) for message in instance.data] + token_counts_list.append(token_counts) + return token_counts_list + + def templatize(self, api_input: List[TextGenerationChatTemplatizeInput], headers: Dict[str, str] = None) -> List[Text]: + template_text_list = [] + for instance in api_input: + templatized_text = f"Mock template: {str(instance.data)}" + template_text_list.append(templatized_text) + return template_text_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_text_reconstruction.py b/tests/unit_tests/models/test_mock_text_reconstruction.py index 536283e..8ee395e 100644 --- a/tests/unit_tests/models/test_mock_text_reconstruction.py +++ b/tests/unit_tests/models/test_mock_text_reconstruction.py @@ -27,14 +27,13 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) output_dict = predict_output["predictions"][0] - assert output_dict["data"] == "This is a text reconstruction." + assert output_dict.data == "This is a text reconstruction." class MockModel(TextReconstructionModel): - def run_model(self, api_input: Dict[str, List[TextReconstructionInput]], headers: Dict[str, str] = None) -> Dict[str, List[TextReconstructionOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[TextReconstructionInput], headers: Dict[str, str] = None) -> List[TextReconstructionInput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = "This is a text reconstruction." @@ -49,5 +48,4 @@ def run_model(self, api_input: Dict[str, List[TextReconstructionInput]], headers } output = TextReconstructionOutput(**output_dict) predictions_list.append(output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/test_mock_text_summarization b/tests/unit_tests/models/test_mock_text_summarization index d9e91cf..ea7fabc 100644 --- a/tests/unit_tests/models/test_mock_text_summarization +++ b/tests/unit_tests/models/test_mock_text_summarization @@ -31,7 +31,7 @@ class TestMockTextSummarization(): predict_output = mock_model.predict(predict_input) summarization_output_dict = predict_output["predictions"][0] - assert summarization_output_dict["data"] == "This is a summary" + assert summarization_output_dict.data == "This is a summary" class MockModel(TextSummarizationModel): def run_model(self, api_input: Dict[str, List[TextSummarizationInput]], headers: Dict[str, str] = None) -> Dict[str, List[TextSummarizationOutput]]: diff --git a/tests/unit_tests/models/test_mock_translation.py b/tests/unit_tests/models/test_mock_translation.py index 7f60da1..e5b2e70 100644 --- a/tests/unit_tests/models/test_mock_translation.py +++ b/tests/unit_tests/models/test_mock_translation.py @@ -34,16 +34,15 @@ def test_predict(self): predict_output = mock_model.predict(predict_input) translation_output_dict = predict_output["predictions"][0] - assert translation_output_dict["data"] == "Hola, como estas?" - assert translation_output_dict["details"]["text"] == "Hola, como estas?" - assert translation_output_dict["details"]["confidence"] == 0.7 + assert translation_output_dict.data == "Hola, como estas?" + assert translation_output_dict.details.text == "Hola, como estas?" + assert translation_output_dict.details.confidence == 0.7 class MockModel(TranslationModel): - def run_model(self, api_input: Dict[str, List[TranslationInput]], headers: Dict[str, str] = None) -> Dict[str, List[TranslationOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[TranslationInput], headers: Dict[str, str] = None) -> List[TranslationOutput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = ("Hola, como estas?", 0.7) @@ -60,5 +59,4 @@ def run_model(self, api_input: Dict[str, List[TranslationInput]], headers: Dict[ } translation_output = TranslationOutput(**output_dict) predictions_list.append(translation_output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file diff --git a/tests/unit_tests/models/text_mock_search.py b/tests/unit_tests/models/text_mock_search.py index 55a4687..163b893 100644 --- a/tests/unit_tests/models/text_mock_search.py +++ b/tests/unit_tests/models/text_mock_search.py @@ -29,16 +29,15 @@ def test_predict(self): mock_model = MockModel("Mock") predict_output = mock_model.predict(predict_input) - output_dict = predict_output["predictions"][0] + output_dict = predict_output.predictions[0] - assert output_dict["data"] == "This is a search output." + assert output_dict.data == "This is a search output." class MockModel(SearchModel): - def run_model(self, api_input: Dict[str, List[SearchInput]], headers: Dict[str, str] = None) -> Dict[str, List[SearchOutput]]: - instances = api_input["instances"] + def run_model(self, api_input: List[SearchInput], headers: Dict[str, str] = None) -> List[SearchOutput]: predictions_list = [] # There's only 1 instance in this case. - for instance in instances: + for instance in api_input: instance_data = instance.dict() model_instance = Mock() model_instance.process_data.return_value = "This is a search output." @@ -53,5 +52,4 @@ def run_model(self, api_input: Dict[str, List[SearchInput]], headers: Dict[str, } search_output = SearchOutput(**output_dict) predictions_list.append(search_output) - predict_output = {"predictions": predictions_list} - return predict_output \ No newline at end of file + return predictions_list \ No newline at end of file