Skip to content

Commit

Permalink
minor change su_emot
Browse files Browse the repository at this point in the history
  • Loading branch information
nadyadtm committed Oct 4, 2022
1 parent 7135a82 commit 2f5c4d8
Showing 1 changed file with 3 additions and 5 deletions.
8 changes: 3 additions & 5 deletions nusacrowd/nusa_datasets/su_emot/su_emot.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
_NUSANTARA_VERSION = "1.0.0"


class suEmot(datasets.GeneratorBasedBuilder):
class SuEmot(datasets.GeneratorBasedBuilder):
"""This is a dataset for emotion classification of Sundanese text. The dataset is gathered from Twitter API between January and March 2019 with 2518 tweets in total."""

SOURCE_VERSION = datasets.Version(_SOURCE_VERSION)
Expand Down Expand Up @@ -118,14 +118,12 @@ def _generate_examples(self, filepath: Path, split: str) -> Tuple[int, Dict]:

if self.config.schema == "source":
for row in df.itertuples():
ex = {"index": str(row.index), "data": row.data, "label": row.label}
ex = {"index": str(row.index+1), "data": row.data, "label": row.label}
yield row.index, ex
elif self.config.schema == "nusantara_text":
for row in df.itertuples():
ex = {"id": str(row.index), "text": row.data, "label": row.label}
ex = {"id": str(row.index+1), "text": row.data, "label": row.label}
yield row.index, ex
else:
raise ValueError(f"Invalid config: {self.config.name}")

if __name__ == "__main__":
datasets.load_dataset(__file__)

0 comments on commit 2f5c4d8

Please sign in to comment.