diff --git a/nusantara/nusa_datasets/titml_idn/titml_idn.py b/nusantara/nusa_datasets/titml_idn/titml_idn.py index 47415a00..9efccd99 100644 --- a/nusantara/nusa_datasets/titml_idn/titml_idn.py +++ b/nusantara/nusa_datasets/titml_idn/titml_idn.py @@ -53,10 +53,10 @@ class TitmlIdn(datasets.GeneratorBasedBuilder): subset_id="titml_idn", ), NusantaraConfig( - name="titml_idn_nusantara_asr", + name="titml_idn_nusantara_sptext", version=datasets.Version(_NUSANTARA_VERSION), description="TITML-IDN Nusantara schema", - schema="nusantara_asr", + schema="nusantara_sptext", subset_id="titml_idn", ), ] @@ -74,8 +74,8 @@ def _info(self): "text": datasets.Value("string"), } ) - elif self.config.schema == "nusantara_asr": - features = schemas.asr_features + elif self.config.schema == "nusantara_sptext": + features = schemas.speech_text_features return datasets.DatasetInfo( description=_DESCRIPTION, @@ -98,7 +98,7 @@ def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datase def _generate_examples(self, filepath: Path, n_speakers=20): - if self.config.schema == "source" or self.config.schema == "nusantara_asr": + if self.config.schema == "source" or self.config.schema == "nusantara_sptext": for speaker_id in range(1, n_speakers + 1): speaker_id = str(speaker_id).zfill(2) @@ -121,7 +121,7 @@ def _generate_examples(self, filepath: Path, n_speakers=20): "text": text, } yield audio_id, ex - elif self.config.schema == "nusantara_asr": + elif self.config.schema == "nusantara_sptext": ex = { "id": audio_id, "speaker_id": speaker_id, diff --git a/nusantara/utils/constants.py b/nusantara/utils/constants.py index b1f532be..db4a0f42 100644 --- a/nusantara/utils/constants.py +++ b/nusantara/utils/constants.py @@ -33,6 +33,7 @@ class Tasks(Enum): TEXTUAL_ENTAILMENT = "TE" SEMANTIC_SIMILARITY = "STS" NEXT_SENTENCE_PREDICTION = "NSP" + SHORT_ANSWER_GRADING = "SAG" # Single Text Generation MACHINE_TRANSLATION = "MT" @@ -45,8 +46,10 @@ class Tasks(Enum): # Self Supervised Pretraining SELF_SUPERVISED_PRETRAINING = "SSP" - # Speech Recognition + # SpeechText SPEECH_RECOGNITION = "ASR" + SPEECH_TO_TEXT_TRANSLATION = "STTT" + TEXT_TO_SPEECH = "TTS" # ImageText IMAGE_CAPTIONING = "IC" diff --git a/nusantara/utils/schemas/__init__.py b/nusantara/utils/schemas/__init__.py index 67af2e0e..4eca1e02 100644 --- a/nusantara/utils/schemas/__init__.py +++ b/nusantara/utils/schemas/__init__.py @@ -7,7 +7,7 @@ from .text_to_text import features as text2text_features from .seq_label import features as seq_label_features from .self_supervised_pretraining import features as ssp_features -from .speech_recognition import features as asr_features +from .speech_text import features as speech_text_features from .image_text import features as image_text_features -__all__ = ["kb_features", "qa_features", "text2text_features", "text_features", "text_multi_features", "pairs_features", "pairs_features_score", "seq_label_features", "ssp_features", "asr_features", "image_text_features"] +__all__ = ["kb_features", "qa_features", "text2text_features", "text_features", "text_multi_features", "pairs_features", "pairs_features_score", "seq_label_features", "ssp_features", "speech_text_features", "image_text_features"] diff --git a/nusantara/utils/schemas/speech_recognition.py b/nusantara/utils/schemas/speech_text.py similarity index 94% rename from nusantara/utils/schemas/speech_recognition.py rename to nusantara/utils/schemas/speech_text.py index 964da7a1..93ab9b65 100644 --- a/nusantara/utils/schemas/speech_recognition.py +++ b/nusantara/utils/schemas/speech_text.py @@ -1,5 +1,5 @@ """ -Speech Recognition Schema +SpeechText Schema """ import datasets diff --git a/tests/test_nusantara.py b/tests/test_nusantara.py index c484b5c6..1578794e 100644 --- a/tests/test_nusantara.py +++ b/tests/test_nusantara.py @@ -12,7 +12,7 @@ import datasets from datasets import DatasetDict, Features from nusantara.utils.constants import Tasks -from nusantara.utils.schemas import kb_features, pairs_features, pairs_features_score, qa_features, text2text_features, text_features, text_multi_features, seq_label_features, ssp_features, asr_features, image_text_features +from nusantara.utils.schemas import kb_features, pairs_features, pairs_features_score, qa_features, text2text_features, text_features, text_multi_features, seq_label_features, ssp_features, speech_text_features, image_text_features sys.path.append(str(Path(__file__).parent.parent)) @@ -35,6 +35,7 @@ Tasks.TEXTUAL_ENTAILMENT: "PAIRS", Tasks.SEMANTIC_SIMILARITY: "PAIRS_SCORE", Tasks.NEXT_SENTENCE_PREDICTION: "PAIRS", + Tasks.SHORT_ANSWER_GRADING: "PAIRS_SCORE", Tasks.PARAPHRASING: "T2T", Tasks.MACHINE_TRANSLATION: "T2T", Tasks.SUMMARIZATION: "T2T", @@ -42,10 +43,12 @@ Tasks.ASPECT_BASED_SENTIMENT_ANALYSIS: "TEXT_MULTI", Tasks.EMOTION_CLASSIFICATION: "TEXT", Tasks.SELF_SUPERVISED_PRETRAINING: "SSP", - Tasks.SPEECH_RECOGNITION: "ASR", - Tasks.IMAGE_CAPTIONING: "IC", - Tasks.STYLIZED_IMAGE_CAPTIONING: "SIC", - Tasks.VISUALLY_GROUNDED_REASONING: "VGR", + Tasks.SPEECH_RECOGNITION: "SPTEXT", + Tasks.SPEECH_TO_TEXT_TRANSLATION: "SPTEXT", + Tasks.TEXT_TO_SPEECH: "SPTEXT", + Tasks.IMAGE_CAPTIONING: "IMTEXT", + Tasks.STYLIZED_IMAGE_CAPTIONING: "IMTEXT", + Tasks.VISUALLY_GROUNDED_REASONING: "IMTEXT", } _VALID_TASKS = set(_TASK_TO_SCHEMA.keys()) @@ -61,10 +64,8 @@ "PAIRS_SCORE": pairs_features_score(), "SEQ_LABEL": seq_label_features(), "SSP": ssp_features, - "ASR": asr_features, - "IC": image_text_features(), - "SIC": image_text_features(), - "VGR": image_text_features(), + "SPTEXT": speech_text_features, + "IMTEXT": image_text_features(), } _TASK_TO_FEATURES = {