Rename SpeechRecognition (asr) schema to SpeechText (sptext); Add tas…

…ks: SPEECH_TO_TEXT_TRANSLATION (using sptext), TEXT_TO_SPEECH (sptext), SHORT_ANSWER_GRADING (pairs_score); Remove redundancy for ImageText (imtext) schema; Modify TITML-IDN to use sptext schema;
IndoNLP · Aug 19, 2022 · 6b7899a · 6b7899a
1 parent eed2b41
commit 6b7899a
Show file tree

Hide file tree

Showing 5 changed files with 23 additions and 19 deletions.
diff --git a/nusantara/nusa_datasets/titml_idn/titml_idn.py b/nusantara/nusa_datasets/titml_idn/titml_idn.py
@@ -53,10 +53,10 @@ class TitmlIdn(datasets.GeneratorBasedBuilder):
             subset_id="titml_idn",
         ),
         NusantaraConfig(
-            name="titml_idn_nusantara_asr",
+            name="titml_idn_nusantara_sptext",
             version=datasets.Version(_NUSANTARA_VERSION),
             description="TITML-IDN Nusantara schema",
-            schema="nusantara_asr",
+            schema="nusantara_sptext",
             subset_id="titml_idn",
         ),
     ]
@@ -74,8 +74,8 @@ def _info(self):
                     "text": datasets.Value("string"),
                 }
             )
-        elif self.config.schema == "nusantara_asr":
-            features = schemas.asr_features
+        elif self.config.schema == "nusantara_sptext":
+            features = schemas.speech_text_features
 
         return datasets.DatasetInfo(
             description=_DESCRIPTION,
@@ -98,7 +98,7 @@ def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datase
 
     def _generate_examples(self, filepath: Path, n_speakers=20):
 
-        if self.config.schema == "source" or self.config.schema == "nusantara_asr":
+        if self.config.schema == "source" or self.config.schema == "nusantara_sptext":
 
             for speaker_id in range(1, n_speakers + 1):
                 speaker_id = str(speaker_id).zfill(2)
@@ -121,7 +121,7 @@ def _generate_examples(self, filepath: Path, n_speakers=20):
                                     "text": text,
                                 }
                                 yield audio_id, ex
-                            elif self.config.schema == "nusantara_asr":
+                            elif self.config.schema == "nusantara_sptext":
                                 ex = {
                                     "id": audio_id,
                                     "speaker_id": speaker_id,

diff --git a/nusantara/utils/constants.py b/nusantara/utils/constants.py
@@ -33,6 +33,7 @@ class Tasks(Enum):
     TEXTUAL_ENTAILMENT = "TE"
     SEMANTIC_SIMILARITY = "STS"
     NEXT_SENTENCE_PREDICTION = "NSP"
+    SHORT_ANSWER_GRADING = "SAG"
 
     # Single Text Generation
     MACHINE_TRANSLATION = "MT"
@@ -45,8 +46,10 @@ class Tasks(Enum):
     # Self Supervised Pretraining
     SELF_SUPERVISED_PRETRAINING = "SSP"
 
-    # Speech Recognition
+    # SpeechText
     SPEECH_RECOGNITION = "ASR"
+    SPEECH_TO_TEXT_TRANSLATION = "STTT"
+    TEXT_TO_SPEECH = "TTS"
 
     # ImageText
     IMAGE_CAPTIONING = "IC"

diff --git a/nusantara/utils/schemas/__init__.py b/nusantara/utils/schemas/__init__.py
@@ -7,7 +7,7 @@
 from .text_to_text import features as text2text_features
 from .seq_label import features as seq_label_features
 from .self_supervised_pretraining import features as ssp_features
-from .speech_recognition import features as asr_features
+from .speech_text import features as speech_text_features
 from .image_text import features as image_text_features
 
-__all__ = ["kb_features", "qa_features", "text2text_features", "text_features", "text_multi_features", "pairs_features", "pairs_features_score", "seq_label_features", "ssp_features", "asr_features", "image_text_features"]
+__all__ = ["kb_features", "qa_features", "text2text_features", "text_features", "text_multi_features", "pairs_features", "pairs_features_score", "seq_label_features", "ssp_features", "speech_text_features", "image_text_features"]
diff --git a/...ntara/utils/schemas/speech_recognition.py → nusantara/utils/schemas/speech_text.py b/...ntara/utils/schemas/speech_recognition.py → nusantara/utils/schemas/speech_text.py
@@ -1,5 +1,5 @@
 """
-Speech Recognition Schema
+SpeechText Schema
 """
 import datasets
 

diff --git a/tests/test_nusantara.py b/tests/test_nusantara.py
@@ -12,7 +12,7 @@
 import datasets
 from datasets import DatasetDict, Features
 from nusantara.utils.constants import Tasks
-from nusantara.utils.schemas import kb_features, pairs_features, pairs_features_score, qa_features, text2text_features, text_features, text_multi_features, seq_label_features, ssp_features, asr_features, image_text_features
+from nusantara.utils.schemas import kb_features, pairs_features, pairs_features_score, qa_features, text2text_features, text_features, text_multi_features, seq_label_features, ssp_features, speech_text_features, image_text_features
 
 sys.path.append(str(Path(__file__).parent.parent))
 
@@ -35,17 +35,20 @@
     Tasks.TEXTUAL_ENTAILMENT: "PAIRS",
     Tasks.SEMANTIC_SIMILARITY: "PAIRS_SCORE",
     Tasks.NEXT_SENTENCE_PREDICTION: "PAIRS",
+    Tasks.SHORT_ANSWER_GRADING: "PAIRS_SCORE",
     Tasks.PARAPHRASING: "T2T",
     Tasks.MACHINE_TRANSLATION: "T2T",
     Tasks.SUMMARIZATION: "T2T",
     Tasks.SENTIMENT_ANALYSIS: "TEXT",
     Tasks.ASPECT_BASED_SENTIMENT_ANALYSIS: "TEXT_MULTI",
     Tasks.EMOTION_CLASSIFICATION: "TEXT",
     Tasks.SELF_SUPERVISED_PRETRAINING: "SSP",
-    Tasks.SPEECH_RECOGNITION: "ASR",
-    Tasks.IMAGE_CAPTIONING: "IC",
-    Tasks.STYLIZED_IMAGE_CAPTIONING: "SIC",
-    Tasks.VISUALLY_GROUNDED_REASONING: "VGR",
+    Tasks.SPEECH_RECOGNITION: "SPTEXT",
+    Tasks.SPEECH_TO_TEXT_TRANSLATION: "SPTEXT",
+    Tasks.TEXT_TO_SPEECH: "SPTEXT",
+    Tasks.IMAGE_CAPTIONING: "IMTEXT",
+    Tasks.STYLIZED_IMAGE_CAPTIONING: "IMTEXT",
+    Tasks.VISUALLY_GROUNDED_REASONING: "IMTEXT",
 }
 
 _VALID_TASKS = set(_TASK_TO_SCHEMA.keys())
@@ -61,10 +64,8 @@
     "PAIRS_SCORE": pairs_features_score(),
     "SEQ_LABEL": seq_label_features(),
     "SSP": ssp_features,
-    "ASR": asr_features,
-    "IC": image_text_features(),
-    "SIC": image_text_features(),
-    "VGR": image_text_features(),
+    "SPTEXT": speech_text_features,
+    "IMTEXT": image_text_features(),
 }
 
 _TASK_TO_FEATURES = {