From c0d003d1add381089492eb0782f4c3ae44595b4a Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Sat, 24 Sep 2022 21:52:33 +0700
Subject: [PATCH 01/12] data loader sundanese twitter emotions

---
 .../sunda_twitter_emotions.py                 | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
new file mode 100644
index 00000000..9dfbb741
--- /dev/null
+++ b/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
@@ -0,0 +1,125 @@
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import datasets
+import pandas as pd
+
+from nusantara.utils import schemas
+from nusantara.utils.configs import NusantaraConfig
+from nusantara.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
+
+# TODO: Add BibTeX citation
+_CITATION = """\
+@INPROCEEDINGS{
+9297929,  
+author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
+booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
+title={Sundanese Twitter Dataset for Emotion Classification},   
+year={2020},  
+volume={},  
+number={},  
+pages={391-395},  
+doi={10.1109/CENIM51130.2020.9297929}
+}
+"""
+
+_DATASETNAME = "sunda_twitter_emotions"
+
+_DESCRIPTION = """\
+This dataset is designed for Emotion Classification NLP task.
+"""
+_HOMEPAGE = ""
+
+_LICENSE = "UNKNOWN"
+
+_URLS = {
+	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
+}
+
+_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
+
+
+_SOURCE_VERSION = "1.0.0"
+
+_NUSANTARA_VERSION = "1.0.0"
+
+
+class NewDataset(datasets.GeneratorBasedBuilder):
+    """TODO: Short description of my dataset."""
+
+    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
+    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
+
+    BUILDER_CONFIGS = [
+        NusantaraConfig(
+            name="sunda_twitter_emotions_source",
+            version=SOURCE_VERSION,
+            description="Sundanese Twitter Dataset for Emotion source schema",
+            schema="source",
+            subset_id="sunda_twitter_emotions",
+        ),
+        NusantaraConfig(
+            name="sunda_twitter_emotions_nusantara_text",
+            version=NUSANTARA_VERSION,
+            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
+            schema="nusantara_text",
+            subset_id="sunda_twitter_emotions",
+        ),
+    ]
+
+    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
+
+    def _info(self) -> datasets.DatasetInfo:
+
+        if self.config.schema == "source":
+            features = datasets.Features({
+                "index": datasets.Value("string"),
+                "data": datasets.Value("string"), 
+                "label": datasets.Value("string")})
+
+        # For example nusantara_kb, nusantara_t2t
+        elif self.config.schema == "nusantara_text":
+            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
+
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        urls = _URLS
+        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
+        data_files = {"train":data_dir}
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": data_files['train'],
+                    "split": "train",
+                },
+            )
+        ]
+
+    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
+
+        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
+        df.columns = ["index","label", "data"]
+
+        if self.config.schema == "source":
+            for row in df.itertuples():
+                ex = {"index": str(row.index), "data": row.data, "label": row.label}
+                yield row.index, ex
+        elif self.config.schema == "nusantara_text":
+            for row in df.itertuples():
+                ex = {"id": str(row.index), "text": row.data, "label": row.label}
+                yield row.index, ex
+        else:
+            raise ValueError(f"Invalid config: {self.config.name}")
+        
+if __name__ == "__main__":
+    datasets.load_dataset(__file__)

From 5d2311436d6048ff6aedfb43a14552cadf77fe57 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Mon, 26 Sep 2022 19:02:36 +0700
Subject: [PATCH 02/12] data loader for sunda twitter emotions dataset

---
 .../sunda_twitter_emotions.py                 | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
new file mode 100644
index 00000000..4071f6d2
--- /dev/null
+++ b/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
@@ -0,0 +1,125 @@
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import datasets
+import pandas as pd
+
+from nusacrowd.utils import schemas
+from nusacrowd.utils.configs import NusantaraConfig
+from nusacrowd.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
+
+# TODO: Add BibTeX citation
+_CITATION = """\
+@INPROCEEDINGS{
+9297929,  
+author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
+booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
+title={Sundanese Twitter Dataset for Emotion Classification},   
+year={2020},  
+volume={},  
+number={},  
+pages={391-395},  
+doi={10.1109/CENIM51130.2020.9297929}
+}
+"""
+
+_DATASETNAME = "sunda_twitter_emotions"
+
+_DESCRIPTION = """\
+This dataset is designed for Emotion Classification NLP task.
+"""
+_HOMEPAGE = ""
+
+_LICENSE = "UNKNOWN"
+
+_URLS = {
+	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
+}
+
+_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
+
+
+_SOURCE_VERSION = "1.0.0"
+
+_NUSANTARA_VERSION = "1.0.0"
+
+
+class NewDataset(datasets.GeneratorBasedBuilder):
+    """TODO: Short description of my dataset."""
+
+    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
+    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
+
+    BUILDER_CONFIGS = [
+        NusantaraConfig(
+            name="sunda_twitter_emotions_source",
+            version=SOURCE_VERSION,
+            description="Sundanese Twitter Dataset for Emotion source schema",
+            schema="source",
+            subset_id="sunda_twitter_emotions",
+        ),
+        NusantaraConfig(
+            name="sunda_twitter_emotions_nusantara_text",
+            version=NUSANTARA_VERSION,
+            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
+            schema="nusantara_text",
+            subset_id="sunda_twitter_emotions",
+        ),
+    ]
+
+    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
+
+    def _info(self) -> datasets.DatasetInfo:
+
+        if self.config.schema == "source":
+            features = datasets.Features({
+                "index": datasets.Value("string"),
+                "data": datasets.Value("string"), 
+                "label": datasets.Value("string")})
+
+        # For example nusantara_kb, nusantara_t2t
+        elif self.config.schema == "nusantara_text":
+            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
+
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        urls = _URLS
+        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
+        data_files = {"train":data_dir}
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": data_files['train'],
+                    "split": "train",
+                },
+            )
+        ]
+
+    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
+
+        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
+        df.columns = ["index","label", "data"]
+
+        if self.config.schema == "source":
+            for row in df.itertuples():
+                ex = {"index": str(row.index), "data": row.data, "label": row.label}
+                yield row.index, ex
+        elif self.config.schema == "nusantara_text":
+            for row in df.itertuples():
+                ex = {"id": str(row.index), "text": row.data, "label": row.label}
+                yield row.index, ex
+        else:
+            raise ValueError(f"Invalid config: {self.config.name}")
+        
+if __name__ == "__main__":
+    datasets.load_dataset(__file__)

From 1cdc881efc34d8fb222b34a7ea1bdd076ba56686 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Mon, 26 Sep 2022 19:06:58 +0700
Subject: [PATCH 03/12] Delete sunda_twitter_emotions.py

---
 .../sunda_twitter_emotions.py                 | 125 ------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
deleted file mode 100644
index 9dfbb741..00000000
--- a/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import os
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-import datasets
-import pandas as pd
-
-from nusantara.utils import schemas
-from nusantara.utils.configs import NusantaraConfig
-from nusantara.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
-
-# TODO: Add BibTeX citation
-_CITATION = """\
-@INPROCEEDINGS{
-9297929,  
-author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
-booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
-title={Sundanese Twitter Dataset for Emotion Classification},   
-year={2020},  
-volume={},  
-number={},  
-pages={391-395},  
-doi={10.1109/CENIM51130.2020.9297929}
-}
-"""
-
-_DATASETNAME = "sunda_twitter_emotions"
-
-_DESCRIPTION = """\
-This dataset is designed for Emotion Classification NLP task.
-"""
-_HOMEPAGE = ""
-
-_LICENSE = "UNKNOWN"
-
-_URLS = {
-	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
-}
-
-_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
-
-
-_SOURCE_VERSION = "1.0.0"
-
-_NUSANTARA_VERSION = "1.0.0"
-
-
-class NewDataset(datasets.GeneratorBasedBuilder):
-    """TODO: Short description of my dataset."""
-
-    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
-    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
-
-    BUILDER_CONFIGS = [
-        NusantaraConfig(
-            name="sunda_twitter_emotions_source",
-            version=SOURCE_VERSION,
-            description="Sundanese Twitter Dataset for Emotion source schema",
-            schema="source",
-            subset_id="sunda_twitter_emotions",
-        ),
-        NusantaraConfig(
-            name="sunda_twitter_emotions_nusantara_text",
-            version=NUSANTARA_VERSION,
-            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
-            schema="nusantara_text",
-            subset_id="sunda_twitter_emotions",
-        ),
-    ]
-
-    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
-
-    def _info(self) -> datasets.DatasetInfo:
-
-        if self.config.schema == "source":
-            features = datasets.Features({
-                "index": datasets.Value("string"),
-                "data": datasets.Value("string"), 
-                "label": datasets.Value("string")})
-
-        # For example nusantara_kb, nusantara_t2t
-        elif self.config.schema == "nusantara_text":
-            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
-
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=features,
-            homepage=_HOMEPAGE,
-            license=_LICENSE,
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
-        urls = _URLS
-        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
-        data_files = {"train":data_dir}
-
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    "filepath": data_files['train'],
-                    "split": "train",
-                },
-            )
-        ]
-
-    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
-
-        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
-        df.columns = ["index","label", "data"]
-
-        if self.config.schema == "source":
-            for row in df.itertuples():
-                ex = {"index": str(row.index), "data": row.data, "label": row.label}
-                yield row.index, ex
-        elif self.config.schema == "nusantara_text":
-            for row in df.itertuples():
-                ex = {"id": str(row.index), "text": row.data, "label": row.label}
-                yield row.index, ex
-        else:
-            raise ValueError(f"Invalid config: {self.config.name}")
-        
-if __name__ == "__main__":
-    datasets.load_dataset(__file__)

From b3e311981f098b9ca07054dd8d15643b9725f913 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Mon, 26 Sep 2022 20:01:43 +0700
Subject: [PATCH 04/12] Delete sunda_twitter_emotions.py

---
 .../sunda_twitter_emotions.py                 | 125 ------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
deleted file mode 100644
index 4071f6d2..00000000
--- a/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import os
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-import datasets
-import pandas as pd
-
-from nusacrowd.utils import schemas
-from nusacrowd.utils.configs import NusantaraConfig
-from nusacrowd.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
-
-# TODO: Add BibTeX citation
-_CITATION = """\
-@INPROCEEDINGS{
-9297929,  
-author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
-booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
-title={Sundanese Twitter Dataset for Emotion Classification},   
-year={2020},  
-volume={},  
-number={},  
-pages={391-395},  
-doi={10.1109/CENIM51130.2020.9297929}
-}
-"""
-
-_DATASETNAME = "sunda_twitter_emotions"
-
-_DESCRIPTION = """\
-This dataset is designed for Emotion Classification NLP task.
-"""
-_HOMEPAGE = ""
-
-_LICENSE = "UNKNOWN"
-
-_URLS = {
-	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
-}
-
-_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
-
-
-_SOURCE_VERSION = "1.0.0"
-
-_NUSANTARA_VERSION = "1.0.0"
-
-
-class NewDataset(datasets.GeneratorBasedBuilder):
-    """TODO: Short description of my dataset."""
-
-    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
-    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
-
-    BUILDER_CONFIGS = [
-        NusantaraConfig(
-            name="sunda_twitter_emotions_source",
-            version=SOURCE_VERSION,
-            description="Sundanese Twitter Dataset for Emotion source schema",
-            schema="source",
-            subset_id="sunda_twitter_emotions",
-        ),
-        NusantaraConfig(
-            name="sunda_twitter_emotions_nusantara_text",
-            version=NUSANTARA_VERSION,
-            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
-            schema="nusantara_text",
-            subset_id="sunda_twitter_emotions",
-        ),
-    ]
-
-    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
-
-    def _info(self) -> datasets.DatasetInfo:
-
-        if self.config.schema == "source":
-            features = datasets.Features({
-                "index": datasets.Value("string"),
-                "data": datasets.Value("string"), 
-                "label": datasets.Value("string")})
-
-        # For example nusantara_kb, nusantara_t2t
-        elif self.config.schema == "nusantara_text":
-            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
-
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=features,
-            homepage=_HOMEPAGE,
-            license=_LICENSE,
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
-        urls = _URLS
-        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
-        data_files = {"train":data_dir}
-
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    "filepath": data_files['train'],
-                    "split": "train",
-                },
-            )
-        ]
-
-    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
-
-        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
-        df.columns = ["index","label", "data"]
-
-        if self.config.schema == "source":
-            for row in df.itertuples():
-                ex = {"index": str(row.index), "data": row.data, "label": row.label}
-                yield row.index, ex
-        elif self.config.schema == "nusantara_text":
-            for row in df.itertuples():
-                ex = {"id": str(row.index), "text": row.data, "label": row.label}
-                yield row.index, ex
-        else:
-            raise ValueError(f"Invalid config: {self.config.name}")
-        
-if __name__ == "__main__":
-    datasets.load_dataset(__file__)

From c8648a8affe607215ac2adfc182de4be03c11d9b Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Mon, 26 Sep 2022 21:06:15 +0700
Subject: [PATCH 05/12] rename files sundanese twitter emotions

---
 nusacrowd/nusa_datasets/su_emot/su_emot.py | 125 +++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 nusacrowd/nusa_datasets/su_emot/su_emot.py

diff --git a/nusacrowd/nusa_datasets/su_emot/su_emot.py b/nusacrowd/nusa_datasets/su_emot/su_emot.py
new file mode 100644
index 00000000..dc53ed5e
--- /dev/null
+++ b/nusacrowd/nusa_datasets/su_emot/su_emot.py
@@ -0,0 +1,125 @@
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import datasets
+import pandas as pd
+
+from nusacrowd.utils import schemas
+from nusacrowd.utils.configs import NusantaraConfig
+from nusacrowd.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
+
+# TODO: Add BibTeX citation
+_CITATION = """\
+@INPROCEEDINGS{
+9297929,  
+author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
+booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
+title={Sundanese Twitter Dataset for Emotion Classification},   
+year={2020},  
+volume={},  
+number={},  
+pages={391-395},  
+doi={10.1109/CENIM51130.2020.9297929}
+}
+"""
+
+_DATASETNAME = "su_emot"
+
+_DESCRIPTION = """\
+This dataset is designed for Emotion Classification NLP task.
+"""
+_HOMEPAGE = ""
+
+_LICENSE = "UNKNOWN"
+
+_URLS = {
+	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
+}
+
+_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
+
+
+_SOURCE_VERSION = "1.0.0"
+
+_NUSANTARA_VERSION = "1.0.0"
+
+
+class NewDataset(datasets.GeneratorBasedBuilder):
+    """TODO: Short description of my dataset."""
+
+    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
+    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
+
+    BUILDER_CONFIGS = [
+        NusantaraConfig(
+            name="su_emot_source",
+            version=SOURCE_VERSION,
+            description="Sundanese Twitter Dataset for Emotion source schema",
+            schema="source",
+            subset_id="su_emot",
+        ),
+        NusantaraConfig(
+            name="su_emot_nusantara_text",
+            version=NUSANTARA_VERSION,
+            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
+            schema="nusantara_text",
+            subset_id="su_emot",
+        ),
+    ]
+
+    DEFAULT_CONFIG_NAME = "su_emot_source"
+
+    def _info(self) -> datasets.DatasetInfo:
+
+        if self.config.schema == "source":
+            features = datasets.Features({
+                "index": datasets.Value("string"),
+                "data": datasets.Value("string"), 
+                "label": datasets.Value("string")})
+
+        # For example nusantara_kb, nusantara_t2t
+        elif self.config.schema == "nusantara_text":
+            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
+
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        urls = _URLS
+        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
+        data_files = {"train":data_dir}
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": data_files['train'],
+                    "split": "train",
+                },
+            )
+        ]
+
+    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
+
+        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
+        df.columns = ["index","label", "data"]
+
+        if self.config.schema == "source":
+            for row in df.itertuples():
+                ex = {"index": str(row.index), "data": row.data, "label": row.label}
+                yield row.index, ex
+        elif self.config.schema == "nusantara_text":
+            for row in df.itertuples():
+                ex = {"id": str(row.index), "text": row.data, "label": row.label}
+                yield row.index, ex
+        else:
+            raise ValueError(f"Invalid config: {self.config.name}")
+        
+if __name__ == "__main__":
+    datasets.load_dataset(__file__)

From 2b7174ca5d6cda465f741e7ca52ffee3d737e16c Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Fri, 30 Sep 2022 20:22:58 +0700
Subject: [PATCH 06/12] Updated change sundanese twitter emotions

---
 nusacrowd/nusa_datasets/su_emot/__init__.py |  0
 nusacrowd/nusa_datasets/su_emot/su_emot.py  | 22 +++++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)
 create mode 100644 nusacrowd/nusa_datasets/su_emot/__init__.py

diff --git a/nusacrowd/nusa_datasets/su_emot/__init__.py b/nusacrowd/nusa_datasets/su_emot/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/nusacrowd/nusa_datasets/su_emot/su_emot.py b/nusacrowd/nusa_datasets/su_emot/su_emot.py
index dc53ed5e..1c0f823c 100644
--- a/nusacrowd/nusa_datasets/su_emot/su_emot.py
+++ b/nusacrowd/nusa_datasets/su_emot/su_emot.py
@@ -9,7 +9,12 @@
 from nusacrowd.utils.configs import NusantaraConfig
 from nusacrowd.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
 
-# TODO: Add BibTeX citation
+_DATASETNAME = "su_emot"
+_SOURCE_VIEW_NAME = DEFAULT_SOURCE_VIEW_NAME
+_UNIFIED_VIEW_NAME = DEFAULT_NUSANTARA_VIEW_NAME
+
+_LANGUAGES = ["sun"]
+_LOCAL = False
 _CITATION = """\
 @INPROCEEDINGS{
 9297929,  
@@ -19,17 +24,18 @@
 year={2020},  
 volume={},  
 number={},  
-pages={391-395},  
+pages={391--395},  
 doi={10.1109/CENIM51130.2020.9297929}
 }
 """
 
-_DATASETNAME = "su_emot"
-
 _DESCRIPTION = """\
-This dataset is designed for Emotion Classification NLP task.
+This is a dataset for emotion classification of Sundanese text. The dataset is gathered from Twitter API between January and March 2019 with 2518 tweets in total. 
+The tweets filtered by using some hashtags which are represented Sundanese emotion, for instance, #persib, #corona, #saredih, #nyakakak, #garoblog, #sangsara, #gumujeng, #bungah, #sararieun, #ceurik, and #hariwang. 
+This dataset contains four distinctive emotions: anger, joy, fear, and sadness. Each tweet is annotated using related emotion. For data
+validation, the authors consulted a Sundanese language teacher for expert validation.
 """
-_HOMEPAGE = ""
+_HOMEPAGE = "https://github.com/virgantara/sundanese-twitter-dataset"
 
 _LICENSE = "UNKNOWN"
 
@@ -45,8 +51,8 @@
 _NUSANTARA_VERSION = "1.0.0"
 
 
-class NewDataset(datasets.GeneratorBasedBuilder):
-    """TODO: Short description of my dataset."""
+class suEmot(datasets.GeneratorBasedBuilder):
+    """This is a dataset for emotion classification of Sundanese text. The dataset is gathered from Twitter API between January and March 2019 with 2518 tweets in total."""
 
     SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
     NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)

From dac6a12226699107a623e1e597de43ea2b65f495 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Sat, 24 Sep 2022 21:52:33 +0700
Subject: [PATCH 07/12] data loader sundanese twitter emotions

---
 .../sunda_twitter_emotions.py                 | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
new file mode 100644
index 00000000..9dfbb741
--- /dev/null
+++ b/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
@@ -0,0 +1,125 @@
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import datasets
+import pandas as pd
+
+from nusantara.utils import schemas
+from nusantara.utils.configs import NusantaraConfig
+from nusantara.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
+
+# TODO: Add BibTeX citation
+_CITATION = """\
+@INPROCEEDINGS{
+9297929,  
+author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
+booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
+title={Sundanese Twitter Dataset for Emotion Classification},   
+year={2020},  
+volume={},  
+number={},  
+pages={391-395},  
+doi={10.1109/CENIM51130.2020.9297929}
+}
+"""
+
+_DATASETNAME = "sunda_twitter_emotions"
+
+_DESCRIPTION = """\
+This dataset is designed for Emotion Classification NLP task.
+"""
+_HOMEPAGE = ""
+
+_LICENSE = "UNKNOWN"
+
+_URLS = {
+	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
+}
+
+_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
+
+
+_SOURCE_VERSION = "1.0.0"
+
+_NUSANTARA_VERSION = "1.0.0"
+
+
+class NewDataset(datasets.GeneratorBasedBuilder):
+    """TODO: Short description of my dataset."""
+
+    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
+    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
+
+    BUILDER_CONFIGS = [
+        NusantaraConfig(
+            name="sunda_twitter_emotions_source",
+            version=SOURCE_VERSION,
+            description="Sundanese Twitter Dataset for Emotion source schema",
+            schema="source",
+            subset_id="sunda_twitter_emotions",
+        ),
+        NusantaraConfig(
+            name="sunda_twitter_emotions_nusantara_text",
+            version=NUSANTARA_VERSION,
+            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
+            schema="nusantara_text",
+            subset_id="sunda_twitter_emotions",
+        ),
+    ]
+
+    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
+
+    def _info(self) -> datasets.DatasetInfo:
+
+        if self.config.schema == "source":
+            features = datasets.Features({
+                "index": datasets.Value("string"),
+                "data": datasets.Value("string"), 
+                "label": datasets.Value("string")})
+
+        # For example nusantara_kb, nusantara_t2t
+        elif self.config.schema == "nusantara_text":
+            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
+
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        urls = _URLS
+        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
+        data_files = {"train":data_dir}
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": data_files['train'],
+                    "split": "train",
+                },
+            )
+        ]
+
+    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
+
+        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
+        df.columns = ["index","label", "data"]
+
+        if self.config.schema == "source":
+            for row in df.itertuples():
+                ex = {"index": str(row.index), "data": row.data, "label": row.label}
+                yield row.index, ex
+        elif self.config.schema == "nusantara_text":
+            for row in df.itertuples():
+                ex = {"id": str(row.index), "text": row.data, "label": row.label}
+                yield row.index, ex
+        else:
+            raise ValueError(f"Invalid config: {self.config.name}")
+        
+if __name__ == "__main__":
+    datasets.load_dataset(__file__)

From edcac302249658a6b6ce5e9e3133b71763182d47 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Mon, 26 Sep 2022 19:02:36 +0700
Subject: [PATCH 08/12] data loader for sunda twitter emotions dataset

---
 .../sunda_twitter_emotions.py                 | 125 ++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
new file mode 100644
index 00000000..4071f6d2
--- /dev/null
+++ b/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
@@ -0,0 +1,125 @@
+import os
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+import datasets
+import pandas as pd
+
+from nusacrowd.utils import schemas
+from nusacrowd.utils.configs import NusantaraConfig
+from nusacrowd.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
+
+# TODO: Add BibTeX citation
+_CITATION = """\
+@INPROCEEDINGS{
+9297929,  
+author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
+booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
+title={Sundanese Twitter Dataset for Emotion Classification},   
+year={2020},  
+volume={},  
+number={},  
+pages={391-395},  
+doi={10.1109/CENIM51130.2020.9297929}
+}
+"""
+
+_DATASETNAME = "sunda_twitter_emotions"
+
+_DESCRIPTION = """\
+This dataset is designed for Emotion Classification NLP task.
+"""
+_HOMEPAGE = ""
+
+_LICENSE = "UNKNOWN"
+
+_URLS = {
+	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
+}
+
+_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
+
+
+_SOURCE_VERSION = "1.0.0"
+
+_NUSANTARA_VERSION = "1.0.0"
+
+
+class NewDataset(datasets.GeneratorBasedBuilder):
+    """TODO: Short description of my dataset."""
+
+    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
+    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
+
+    BUILDER_CONFIGS = [
+        NusantaraConfig(
+            name="sunda_twitter_emotions_source",
+            version=SOURCE_VERSION,
+            description="Sundanese Twitter Dataset for Emotion source schema",
+            schema="source",
+            subset_id="sunda_twitter_emotions",
+        ),
+        NusantaraConfig(
+            name="sunda_twitter_emotions_nusantara_text",
+            version=NUSANTARA_VERSION,
+            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
+            schema="nusantara_text",
+            subset_id="sunda_twitter_emotions",
+        ),
+    ]
+
+    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
+
+    def _info(self) -> datasets.DatasetInfo:
+
+        if self.config.schema == "source":
+            features = datasets.Features({
+                "index": datasets.Value("string"),
+                "data": datasets.Value("string"), 
+                "label": datasets.Value("string")})
+
+        # For example nusantara_kb, nusantara_t2t
+        elif self.config.schema == "nusantara_text":
+            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
+
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=features,
+            homepage=_HOMEPAGE,
+            license=_LICENSE,
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
+        urls = _URLS
+        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
+        data_files = {"train":data_dir}
+
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN,
+                gen_kwargs={
+                    "filepath": data_files['train'],
+                    "split": "train",
+                },
+            )
+        ]
+
+    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
+
+        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
+        df.columns = ["index","label", "data"]
+
+        if self.config.schema == "source":
+            for row in df.itertuples():
+                ex = {"index": str(row.index), "data": row.data, "label": row.label}
+                yield row.index, ex
+        elif self.config.schema == "nusantara_text":
+            for row in df.itertuples():
+                ex = {"id": str(row.index), "text": row.data, "label": row.label}
+                yield row.index, ex
+        else:
+            raise ValueError(f"Invalid config: {self.config.name}")
+        
+if __name__ == "__main__":
+    datasets.load_dataset(__file__)

From b83862659bcf52dc226c462732c467d0d290006d Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Mon, 26 Sep 2022 19:06:58 +0700
Subject: [PATCH 09/12] Delete sunda_twitter_emotions.py

---
 .../sunda_twitter_emotions.py                 | 125 ------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
deleted file mode 100644
index 9dfbb741..00000000
--- a/nusantara/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import os
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-import datasets
-import pandas as pd
-
-from nusantara.utils import schemas
-from nusantara.utils.configs import NusantaraConfig
-from nusantara.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
-
-# TODO: Add BibTeX citation
-_CITATION = """\
-@INPROCEEDINGS{
-9297929,  
-author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
-booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
-title={Sundanese Twitter Dataset for Emotion Classification},   
-year={2020},  
-volume={},  
-number={},  
-pages={391-395},  
-doi={10.1109/CENIM51130.2020.9297929}
-}
-"""
-
-_DATASETNAME = "sunda_twitter_emotions"
-
-_DESCRIPTION = """\
-This dataset is designed for Emotion Classification NLP task.
-"""
-_HOMEPAGE = ""
-
-_LICENSE = "UNKNOWN"
-
-_URLS = {
-	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
-}
-
-_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
-
-
-_SOURCE_VERSION = "1.0.0"
-
-_NUSANTARA_VERSION = "1.0.0"
-
-
-class NewDataset(datasets.GeneratorBasedBuilder):
-    """TODO: Short description of my dataset."""
-
-    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
-    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
-
-    BUILDER_CONFIGS = [
-        NusantaraConfig(
-            name="sunda_twitter_emotions_source",
-            version=SOURCE_VERSION,
-            description="Sundanese Twitter Dataset for Emotion source schema",
-            schema="source",
-            subset_id="sunda_twitter_emotions",
-        ),
-        NusantaraConfig(
-            name="sunda_twitter_emotions_nusantara_text",
-            version=NUSANTARA_VERSION,
-            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
-            schema="nusantara_text",
-            subset_id="sunda_twitter_emotions",
-        ),
-    ]
-
-    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
-
-    def _info(self) -> datasets.DatasetInfo:
-
-        if self.config.schema == "source":
-            features = datasets.Features({
-                "index": datasets.Value("string"),
-                "data": datasets.Value("string"), 
-                "label": datasets.Value("string")})
-
-        # For example nusantara_kb, nusantara_t2t
-        elif self.config.schema == "nusantara_text":
-            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
-
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=features,
-            homepage=_HOMEPAGE,
-            license=_LICENSE,
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
-        urls = _URLS
-        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
-        data_files = {"train":data_dir}
-
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    "filepath": data_files['train'],
-                    "split": "train",
-                },
-            )
-        ]
-
-    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
-
-        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
-        df.columns = ["index","label", "data"]
-
-        if self.config.schema == "source":
-            for row in df.itertuples():
-                ex = {"index": str(row.index), "data": row.data, "label": row.label}
-                yield row.index, ex
-        elif self.config.schema == "nusantara_text":
-            for row in df.itertuples():
-                ex = {"id": str(row.index), "text": row.data, "label": row.label}
-                yield row.index, ex
-        else:
-            raise ValueError(f"Invalid config: {self.config.name}")
-        
-if __name__ == "__main__":
-    datasets.load_dataset(__file__)

From 7135a82c33b3ebdedf4ba304e116bc48692526c3 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Mon, 26 Sep 2022 20:01:43 +0700
Subject: [PATCH 10/12] Delete sunda_twitter_emotions.py

---
 .../sunda_twitter_emotions.py                 | 125 ------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py

diff --git a/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py b/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
deleted file mode 100644
index 4071f6d2..00000000
--- a/nusacrowd/nusa_datasets/sunda_twitter_emotions/sunda_twitter_emotions.py
+++ /dev/null
@@ -1,125 +0,0 @@
-import os
-from pathlib import Path
-from typing import Dict, List, Tuple
-
-import datasets
-import pandas as pd
-
-from nusacrowd.utils import schemas
-from nusacrowd.utils.configs import NusantaraConfig
-from nusacrowd.utils.constants import DEFAULT_NUSANTARA_VIEW_NAME, DEFAULT_SOURCE_VIEW_NAME, Tasks
-
-# TODO: Add BibTeX citation
-_CITATION = """\
-@INPROCEEDINGS{
-9297929,  
-author={Putra, Oddy Virgantara and Wasmanson, Fathin Muhammad and Harmini, Triana and Utama, Shoffin Nahwa},  
-booktitle={2020 International Conference on Computer Engineering, Network, and Intelligent Multimedia (CENIM)},   
-title={Sundanese Twitter Dataset for Emotion Classification},   
-year={2020},  
-volume={},  
-number={},  
-pages={391-395},  
-doi={10.1109/CENIM51130.2020.9297929}
-}
-"""
-
-_DATASETNAME = "sunda_twitter_emotions"
-
-_DESCRIPTION = """\
-This dataset is designed for Emotion Classification NLP task.
-"""
-_HOMEPAGE = ""
-
-_LICENSE = "UNKNOWN"
-
-_URLS = {
-	"datasets": "https://raw.githubusercontent.com/virgantara/sundanese-twitter-dataset/master/newdataset.csv"
-}
-
-_SUPPORTED_TASKS = [Tasks.EMOTION_CLASSIFICATION]
-
-
-_SOURCE_VERSION = "1.0.0"
-
-_NUSANTARA_VERSION = "1.0.0"
-
-
-class NewDataset(datasets.GeneratorBasedBuilder):
-    """TODO: Short description of my dataset."""
-
-    SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
-    NUSANTARA_VERSION = datasets.Version(_NUSANTARA_VERSION)
-
-    BUILDER_CONFIGS = [
-        NusantaraConfig(
-            name="sunda_twitter_emotions_source",
-            version=SOURCE_VERSION,
-            description="Sundanese Twitter Dataset for Emotion source schema",
-            schema="source",
-            subset_id="sunda_twitter_emotions",
-        ),
-        NusantaraConfig(
-            name="sunda_twitter_emotions_nusantara_text",
-            version=NUSANTARA_VERSION,
-            description="Sundanese Twitter Dataset for Emotion Nusantara schema",
-            schema="nusantara_text",
-            subset_id="sunda_twitter_emotions",
-        ),
-    ]
-
-    DEFAULT_CONFIG_NAME = "sunda_twitter_emotions_source"
-
-    def _info(self) -> datasets.DatasetInfo:
-
-        if self.config.schema == "source":
-            features = datasets.Features({
-                "index": datasets.Value("string"),
-                "data": datasets.Value("string"), 
-                "label": datasets.Value("string")})
-
-        # For example nusantara_kb, nusantara_t2t
-        elif self.config.schema == "nusantara_text":
-            features = schemas.text_features(["anger", "joy", "fear", "sadness"])
-
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=features,
-            homepage=_HOMEPAGE,
-            license=_LICENSE,
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager: datasets.DownloadManager) -> List[datasets.SplitGenerator]:
-        urls = _URLS
-        data_dir = Path(dl_manager.download_and_extract(urls['datasets']))
-        data_files = {"train":data_dir}
-
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN,
-                gen_kwargs={
-                    "filepath": data_files['train'],
-                    "split": "train",
-                },
-            )
-        ]
-
-    def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
-
-        df = pd.read_csv(filepath, sep=",", header="infer").reset_index()
-        df.columns = ["index","label", "data"]
-
-        if self.config.schema == "source":
-            for row in df.itertuples():
-                ex = {"index": str(row.index), "data": row.data, "label": row.label}
-                yield row.index, ex
-        elif self.config.schema == "nusantara_text":
-            for row in df.itertuples():
-                ex = {"id": str(row.index), "text": row.data, "label": row.label}
-                yield row.index, ex
-        else:
-            raise ValueError(f"Invalid config: {self.config.name}")
-        
-if __name__ == "__main__":
-    datasets.load_dataset(__file__)

From 2f5c4d8514e48308dea28909fdb8bf2181d384d0 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Tue, 4 Oct 2022 11:38:10 +0700
Subject: [PATCH 11/12] minor change su_emot

---
 nusacrowd/nusa_datasets/su_emot/su_emot.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/nusacrowd/nusa_datasets/su_emot/su_emot.py b/nusacrowd/nusa_datasets/su_emot/su_emot.py
index 1c0f823c..91c120cb 100644
--- a/nusacrowd/nusa_datasets/su_emot/su_emot.py
+++ b/nusacrowd/nusa_datasets/su_emot/su_emot.py
@@ -51,7 +51,7 @@
 _NUSANTARA_VERSION = "1.0.0"
 
 
-class suEmot(datasets.GeneratorBasedBuilder):
+class SuEmot(datasets.GeneratorBasedBuilder):
     """This is a dataset for emotion classification of Sundanese text. The dataset is gathered from Twitter API between January and March 2019 with 2518 tweets in total."""
 
     SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
@@ -118,14 +118,12 @@ def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
 
         if self.config.schema == "source":
             for row in df.itertuples():
-                ex = {"index": str(row.index), "data": row.data, "label": row.label}
+                ex = {"index": str(row.index+1), "data": row.data, "label": row.label}
                 yield row.index, ex
         elif self.config.schema == "nusantara_text":
             for row in df.itertuples():
-                ex = {"id": str(row.index), "text": row.data, "label": row.label}
+                ex = {"id": str(row.index+1), "text": row.data, "label": row.label}
                 yield row.index, ex
-        else:
-            raise ValueError(f"Invalid config: {self.config.name}")
         
 if __name__ == "__main__":
     datasets.load_dataset(__file__)

From 773e9731fd0ee4f46a98d759654dd135ae3870b3 Mon Sep 17 00:00:00 2001
From: Nadya Aditama <nadya.aditama@gmail.com>
Date: Tue, 4 Oct 2022 14:17:35 +0700
Subject: [PATCH 12/12] minor change su_emot.py

---
 nusacrowd/nusa_datasets/su_emot/su_emot.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/nusacrowd/nusa_datasets/su_emot/su_emot.py b/nusacrowd/nusa_datasets/su_emot/su_emot.py
index 91c120cb..b7582a08 100644
--- a/nusacrowd/nusa_datasets/su_emot/su_emot.py
+++ b/nusacrowd/nusa_datasets/su_emot/su_emot.py
@@ -124,6 +124,3 @@ def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:
             for row in df.itertuples():
                 ex = {"id": str(row.index+1), "text": row.data, "label": row.label}
                 yield row.index, ex
-        
-if __name__ == "__main__":
-    datasets.load_dataset(__file__)