From 09bfa88b7d4b61c12aaf6bcd924e166e15c4962f Mon Sep 17 00:00:00 2001 From: Samuel Cahyawijaya Date: Fri, 15 Jul 2022 15:00:09 +0800 Subject: [PATCH] update test script - change label checking to an implicit empty value checking, change indo_nli to indonli, add jsonlines requirements --- .../{indo_nli/indo_nli.py => indonli/indonli.py} | 16 ++++++++-------- requirements.txt | 1 + tests/test_nusantara.py | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) rename nusantara/nusa_datasets/{indo_nli/indo_nli.py => indonli/indonli.py} (96%) diff --git a/nusantara/nusa_datasets/indo_nli/indo_nli.py b/nusantara/nusa_datasets/indonli/indonli.py similarity index 96% rename from nusantara/nusa_datasets/indo_nli/indo_nli.py rename to nusantara/nusa_datasets/indonli/indonli.py index a096b588..b98640bd 100644 --- a/nusantara/nusa_datasets/indo_nli/indo_nli.py +++ b/nusantara/nusa_datasets/indonli/indonli.py @@ -53,7 +53,7 @@ } """ -_DATASETNAME = "indo_nli" +_DATASETNAME = "indonli" _DESCRIPTION = """\ This dataset is designed for Natural Language Inference NLP task. It is designed to provide a challenging test-bed @@ -100,22 +100,22 @@ class IndoNli(datasets.GeneratorBasedBuilder): BUILDER_CONFIGS = [ NusantaraConfig( - name="indo_nli_source", + name="indonli_source", version=SOURCE_VERSION, - description="indo_nli source schema", + description="indonli source schema", schema="source", - subset_id="indo_nli", + subset_id="indonli", ), NusantaraConfig( - name="indo_nli_nusantara_pairs", + name="indonli_nusantara_pairs", version=NUSANTARA_VERSION, - description="indo_nli Nusantara schema", + description="indonli Nusantara schema", schema="nusantara_pairs", - subset_id="indo_nli", + subset_id="indonli", ), ] - DEFAULT_CONFIG_NAME = "indo_nli_source" + DEFAULT_CONFIG_NAME = "indonli_source" labels = ["c", "e", "n"] def _info(self) -> datasets.DatasetInfo: diff --git a/requirements.txt b/requirements.txt index 23a71f07..a0217966 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,4 @@ aiohttp==3.8.1 pre-commit==2.19.0 soundfile librosa +jsonlines>=3.1.0 \ No newline at end of file diff --git a/tests/test_nusantara.py b/tests/test_nusantara.py index e8e19a89..0e0a357f 100644 --- a/tests/test_nusantara.py +++ b/tests/test_nusantara.py @@ -224,7 +224,7 @@ def get_feature_statistics(self, features: Features, schema: str) -> Dict: for feature_name, feature in features.items(): if example.get(feature_name, None) is not None: if isinstance(feature, datasets.ClassLabel) or isinstance(feature, datasets.Value): - if example[feature_name]: + if example[feature_name] is not None: counter[feature_name] += 1 else: counter[feature_name] += len(example[feature_name])