Skip to content

Commit

Permalink
#508 update
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Aug 18, 2023
1 parent ddd5ccf commit 2b2dd19
Show file tree
Hide file tree
Showing 5 changed files with 343 additions and 0 deletions.
Empty file.
62 changes: 62 additions & 0 deletions arekit/contrib/source/nerelbio/io_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from os import path

from arekit.common.experiment.data_type import DataType
from arekit.contrib.source.nerel.folding.fixed import create_fixed_folding
from arekit.contrib.source.nerel.io_utils import NerelIOUtils
from arekit.contrib.source.nerel.utils import iter_filename_and_splittype


class NerelBioIOUtils(NerelIOUtils):

splits = {
DataType.Train: "train",
DataType.Dev: "dev",
DataType.Test: "test"
}

@staticmethod
def get_archive_filepath(version):
return path.join(NerelBioIOUtils.get_data_root(), "nerel-bio-{}.zip".format(version))

@staticmethod
def get_annotation_innerpath(folding_data_type, filename):
assert(isinstance(filename, str))
return path.join(NerelBioIOUtils.splits[folding_data_type], "{}.ann".format(filename))

@staticmethod
def get_news_innerpath(folding_data_type, filename):
assert(isinstance(filename, str))
return path.join(NerelBioIOUtils.splits[folding_data_type], "{}.txt".format(filename))

@staticmethod
def map_doc_to_fold_type(version):

it = iter_filename_and_splittype(
filenames_it=NerelBioIOUtils.iter_filenames_from_zip(version),
splits=NerelBioIOUtils.splits.items())

d2f = {}
for filename, split_type in it:
d2f[filename] = split_type

return d2f

@staticmethod
def read_dataset_split(version, docs_limit=None):

it = iter_filename_and_splittype(
filenames_it=NerelBioIOUtils.iter_filenames_from_zip(version),
splits=NerelBioIOUtils.splits.items())

f2d = {}
for filename, split_type in it:
if split_type not in f2d:
f2d[split_type] = []
f2d[split_type].append(filename)

filenames_by_ids, data_folding = create_fixed_folding(train_filenames=f2d[DataType.Train],
test_filenames=f2d[DataType.Test],
dev_filenames=f2d[DataType.Dev],
limit=docs_limit)

return filenames_by_ids, data_folding
265 changes: 265 additions & 0 deletions arekit/contrib/source/nerelbio/labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,265 @@
from arekit.common.labels.base import Label


class ABBREVIATION(Label):
pass


class ALTERNATIVE_NAME(Label):
pass


class KNOWS(Label):
pass


class AGE_IS(Label):
pass


class AGE_DIED_AT(Label):
pass


class AWARDED_WITH(Label):
pass


class PLACE_OF_BIRTH(Label):
pass


class DATE_DEFUNCT_IN(Label):
pass


class DATE_FOUNDED_IN(Label):
pass


class DATE_OF_BIRTH(Label):
pass


class DATE_OF_CREATION(Label):
pass


class DATE_OF_DEATH(Label):
pass


class POINT_IN_TIME(Label):
pass


class PLACE_OF_DEATH(Label):
pass


class FOUNDED_BY(Label):
pass


class HEADQUARTERED_IN(Label):
pass


class IDEOLOGY_OF(Label):
pass


class SPOUSE(Label):
pass


class MEMBER_OF(Label):
pass


class ORGANIZES(Label):
pass


class OWNER_OF(Label):
pass


class PARENT_OF(Label):
pass


class PARTICIPANT_IN(Label):
pass


class PLACE_RESIDES_IN(Label):
pass


class PRICE_OF(Label):
pass


class PRODUCES(Label):
pass


class RELATIVE(Label):
pass


class RELIGION_OF(Label):
pass


class SCHOOLS_ATTENDED(Label):
pass


class SIBLING(Label):
pass


class SUBEVENT_OF(Label):
pass


class SUBORDINATE_OF(Label):
pass


class TAKES_PLACE_IN(Label):
pass


class WORKPLACE(Label):
pass


class WORKS_AS(Label):
pass


class CONVICTED_OF(Label):
pass


class PENALIZED_AS(Label):
pass


class START_TIME(Label):
pass


class END_TIME(Label):
pass


class EXPENDITURE(Label):
pass


class AGENT(Label):
pass


class INANIMATE_INVOLVED(Label):
pass


class INCOME(Label):
pass


class SUBCLASS_OF(Label):
pass


class PART_OF(Label):
pass


class LOCATED_IN(Label):
pass


class TREATED_USING(Label):
pass


class ORIGINS_FROM(Label):
pass


class TO_DETECT_OR_STUDY(Label):
pass


class AFFECTS(Label):
pass


class HAS_CAUSE(Label):
pass


class APPLIED_TO(Label):
pass


class USED_IN(Label):
pass


class ASSOCIATED_WITH(Label):
pass


class HAS_ADMINISTRATION_ROUTE(Label):
pass


class HAS_STRENGTH(Label):
pass


class DURATION_OF(Label):
pass


class VALUE_IS(Label):
pass


class PHYSIOLOGY_OF(Label):
pass


class PROCEDURE_PERFORMED(Label):
pass


class MENTAL_PROCESS_OF(Label):
pass


class MEDICAL_CONDITION(Label):
pass


class DOSE_IS(Label):
pass


class FINDING_OF(Label):
pass


class CAUSE_OF_DEATH(Label):
pass


class CONSUME(Label):
pass
8 changes: 8 additions & 0 deletions arekit/contrib/source/nerelbio/reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from arekit.contrib.source.nerel.reader import NerelDocReader
from arekit.contrib.source.nerelbio.io_utils import NerelBioIOUtils


class NerelBioDocReader(NerelDocReader):

def __init__(self, version):
super(NerelBioDocReader, self).__init__(version=version, io_utils=NerelBioIOUtils())
8 changes: 8 additions & 0 deletions arekit/contrib/source/nerelbio/versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import enum


class NerelBioVersions(enum.Enum):
V1 = "v1_0"


DEFAULT_VERSION = NerelBioVersions.V1

0 comments on commit 2b2dd19

Please sign in to comment.