Skip to content

Commit

Permalink
#502 related fix in 0.23.1 version. #501 related
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Jul 22, 2023
1 parent ba542d7 commit 3f3c185
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions arekit/contrib/utils/data/writers/json_opennre.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,16 @@ class OpenNREJsonWriter(BaseWriter):
During the dataset reading stage via OpenNRE, these linkages automaticaly groups into bags.
"""

EXTRA_KEYS_TEMPLATE = "_{}"

def __init__(self, text_columns, encoding="utf-8"):
""" text_columns: list
column names that expected to be joined into a single (token) column.
"""
assert(isinstance(text_columns, list))
assert(isinstance(encoding, str))
self.__encoding = encoding
self.__text_columns = text_columns
self.__encoding = encoding
self.__target_f = None

@staticmethod
Expand All @@ -50,7 +56,7 @@ def __format_row(row, text_columns):
tokens.extend(row[text_col].split())

# Filtering JSON row.
return {
formatted_data = {
"id": bag_id,
"id_orig": sample_id,
"token": tokens,
Expand All @@ -59,6 +65,13 @@ def __format_row(row, text_columns):
"relation": str(int(row[const.LABEL])) if const.LABEL in row else "NA"
}

# Register extra fields.
for key, value in row.items():
if key not in formatted_data and key not in text_columns:
formatted_data[OpenNREJsonWriter.EXTRA_KEYS_TEMPLATE.format(key)] = value

return formatted_data

def open_target(self, target):
os.makedirs(dirname(target), exist_ok=True)
self.__target_f = open(target, "w")
Expand Down

0 comments on commit 3f3c185

Please sign in to comment.