-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
343 additions
and
0 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
from os import path | ||
|
||
from arekit.common.experiment.data_type import DataType | ||
from arekit.contrib.source.nerel.folding.fixed import create_fixed_folding | ||
from arekit.contrib.source.nerel.io_utils import NerelIOUtils | ||
from arekit.contrib.source.nerel.utils import iter_filename_and_splittype | ||
|
||
|
||
class NerelBioIOUtils(NerelIOUtils): | ||
|
||
splits = { | ||
DataType.Train: "train", | ||
DataType.Dev: "dev", | ||
DataType.Test: "test" | ||
} | ||
|
||
@staticmethod | ||
def get_archive_filepath(version): | ||
return path.join(NerelBioIOUtils.get_data_root(), "nerel-bio-{}.zip".format(version)) | ||
|
||
@staticmethod | ||
def get_annotation_innerpath(folding_data_type, filename): | ||
assert(isinstance(filename, str)) | ||
return path.join(NerelBioIOUtils.splits[folding_data_type], "{}.ann".format(filename)) | ||
|
||
@staticmethod | ||
def get_news_innerpath(folding_data_type, filename): | ||
assert(isinstance(filename, str)) | ||
return path.join(NerelBioIOUtils.splits[folding_data_type], "{}.txt".format(filename)) | ||
|
||
@staticmethod | ||
def map_doc_to_fold_type(version): | ||
|
||
it = iter_filename_and_splittype( | ||
filenames_it=NerelBioIOUtils.iter_filenames_from_zip(version), | ||
splits=NerelBioIOUtils.splits.items()) | ||
|
||
d2f = {} | ||
for filename, split_type in it: | ||
d2f[filename] = split_type | ||
|
||
return d2f | ||
|
||
@staticmethod | ||
def read_dataset_split(version, docs_limit=None): | ||
|
||
it = iter_filename_and_splittype( | ||
filenames_it=NerelBioIOUtils.iter_filenames_from_zip(version), | ||
splits=NerelBioIOUtils.splits.items()) | ||
|
||
f2d = {} | ||
for filename, split_type in it: | ||
if split_type not in f2d: | ||
f2d[split_type] = [] | ||
f2d[split_type].append(filename) | ||
|
||
filenames_by_ids, data_folding = create_fixed_folding(train_filenames=f2d[DataType.Train], | ||
test_filenames=f2d[DataType.Test], | ||
dev_filenames=f2d[DataType.Dev], | ||
limit=docs_limit) | ||
|
||
return filenames_by_ids, data_folding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,265 @@ | ||
from arekit.common.labels.base import Label | ||
|
||
|
||
class ABBREVIATION(Label): | ||
pass | ||
|
||
|
||
class ALTERNATIVE_NAME(Label): | ||
pass | ||
|
||
|
||
class KNOWS(Label): | ||
pass | ||
|
||
|
||
class AGE_IS(Label): | ||
pass | ||
|
||
|
||
class AGE_DIED_AT(Label): | ||
pass | ||
|
||
|
||
class AWARDED_WITH(Label): | ||
pass | ||
|
||
|
||
class PLACE_OF_BIRTH(Label): | ||
pass | ||
|
||
|
||
class DATE_DEFUNCT_IN(Label): | ||
pass | ||
|
||
|
||
class DATE_FOUNDED_IN(Label): | ||
pass | ||
|
||
|
||
class DATE_OF_BIRTH(Label): | ||
pass | ||
|
||
|
||
class DATE_OF_CREATION(Label): | ||
pass | ||
|
||
|
||
class DATE_OF_DEATH(Label): | ||
pass | ||
|
||
|
||
class POINT_IN_TIME(Label): | ||
pass | ||
|
||
|
||
class PLACE_OF_DEATH(Label): | ||
pass | ||
|
||
|
||
class FOUNDED_BY(Label): | ||
pass | ||
|
||
|
||
class HEADQUARTERED_IN(Label): | ||
pass | ||
|
||
|
||
class IDEOLOGY_OF(Label): | ||
pass | ||
|
||
|
||
class SPOUSE(Label): | ||
pass | ||
|
||
|
||
class MEMBER_OF(Label): | ||
pass | ||
|
||
|
||
class ORGANIZES(Label): | ||
pass | ||
|
||
|
||
class OWNER_OF(Label): | ||
pass | ||
|
||
|
||
class PARENT_OF(Label): | ||
pass | ||
|
||
|
||
class PARTICIPANT_IN(Label): | ||
pass | ||
|
||
|
||
class PLACE_RESIDES_IN(Label): | ||
pass | ||
|
||
|
||
class PRICE_OF(Label): | ||
pass | ||
|
||
|
||
class PRODUCES(Label): | ||
pass | ||
|
||
|
||
class RELATIVE(Label): | ||
pass | ||
|
||
|
||
class RELIGION_OF(Label): | ||
pass | ||
|
||
|
||
class SCHOOLS_ATTENDED(Label): | ||
pass | ||
|
||
|
||
class SIBLING(Label): | ||
pass | ||
|
||
|
||
class SUBEVENT_OF(Label): | ||
pass | ||
|
||
|
||
class SUBORDINATE_OF(Label): | ||
pass | ||
|
||
|
||
class TAKES_PLACE_IN(Label): | ||
pass | ||
|
||
|
||
class WORKPLACE(Label): | ||
pass | ||
|
||
|
||
class WORKS_AS(Label): | ||
pass | ||
|
||
|
||
class CONVICTED_OF(Label): | ||
pass | ||
|
||
|
||
class PENALIZED_AS(Label): | ||
pass | ||
|
||
|
||
class START_TIME(Label): | ||
pass | ||
|
||
|
||
class END_TIME(Label): | ||
pass | ||
|
||
|
||
class EXPENDITURE(Label): | ||
pass | ||
|
||
|
||
class AGENT(Label): | ||
pass | ||
|
||
|
||
class INANIMATE_INVOLVED(Label): | ||
pass | ||
|
||
|
||
class INCOME(Label): | ||
pass | ||
|
||
|
||
class SUBCLASS_OF(Label): | ||
pass | ||
|
||
|
||
class PART_OF(Label): | ||
pass | ||
|
||
|
||
class LOCATED_IN(Label): | ||
pass | ||
|
||
|
||
class TREATED_USING(Label): | ||
pass | ||
|
||
|
||
class ORIGINS_FROM(Label): | ||
pass | ||
|
||
|
||
class TO_DETECT_OR_STUDY(Label): | ||
pass | ||
|
||
|
||
class AFFECTS(Label): | ||
pass | ||
|
||
|
||
class HAS_CAUSE(Label): | ||
pass | ||
|
||
|
||
class APPLIED_TO(Label): | ||
pass | ||
|
||
|
||
class USED_IN(Label): | ||
pass | ||
|
||
|
||
class ASSOCIATED_WITH(Label): | ||
pass | ||
|
||
|
||
class HAS_ADMINISTRATION_ROUTE(Label): | ||
pass | ||
|
||
|
||
class HAS_STRENGTH(Label): | ||
pass | ||
|
||
|
||
class DURATION_OF(Label): | ||
pass | ||
|
||
|
||
class VALUE_IS(Label): | ||
pass | ||
|
||
|
||
class PHYSIOLOGY_OF(Label): | ||
pass | ||
|
||
|
||
class PROCEDURE_PERFORMED(Label): | ||
pass | ||
|
||
|
||
class MENTAL_PROCESS_OF(Label): | ||
pass | ||
|
||
|
||
class MEDICAL_CONDITION(Label): | ||
pass | ||
|
||
|
||
class DOSE_IS(Label): | ||
pass | ||
|
||
|
||
class FINDING_OF(Label): | ||
pass | ||
|
||
|
||
class CAUSE_OF_DEATH(Label): | ||
pass | ||
|
||
|
||
class CONSUME(Label): | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from arekit.contrib.source.nerel.reader import NerelDocReader | ||
from arekit.contrib.source.nerelbio.io_utils import NerelBioIOUtils | ||
|
||
|
||
class NerelBioDocReader(NerelDocReader): | ||
|
||
def __init__(self, version): | ||
super(NerelBioDocReader, self).__init__(version=version, io_utils=NerelBioIOUtils()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
import enum | ||
|
||
|
||
class NerelBioVersions(enum.Enum): | ||
V1 = "v1_0" | ||
|
||
|
||
DEFAULT_VERSION = NerelBioVersions.V1 |