From 63967a3df01fefbcae26f45608ac21684e2645c7 Mon Sep 17 00:00:00 2001 From: lorr1 <57237365+lorr1@users.noreply.github.com> Date: Wed, 28 Apr 2021 22:04:55 -0600 Subject: [PATCH] Merge with embeddings in annotator --- bootleg/end2end/bootleg_annotator.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/bootleg/end2end/bootleg_annotator.py b/bootleg/end2end/bootleg_annotator.py index d549922e..c39a406b 100644 --- a/bootleg/end2end/bootleg_annotator.py +++ b/bootleg/end2end/bootleg_annotator.py @@ -32,10 +32,10 @@ logger = logging.getLogger(__name__) BOOTLEG_MODEL_PATHS = { - "bootleg_cased": "https://bootleg-data.s3.amazonaws.com/models/latest/bootleg_cased.tar.gz", - "bootleg_cased_mini": "https://bootleg-data.s3.amazonaws.com/models/latest/bootleg_cased_mini.tar.gz", - "bootleg_uncased": "https://bootleg-data.s3.amazonaws.com/models/latest/bootleg_uncased.tar.gz", - "bootleg_uncased_mini": "https://bootleg-data.s3.amazonaws.com/models/latest/bootleg_uncased_mini.tar.gz", + "bootleg_cased": "https://bootleg-data.s3-us-west-2.amazonaws.com/models/latest/bootleg_cased.tar.gz", + "bootleg_cased_mini": "https://bootleg-data.s3-us-west-2.amazonaws.com/models/latest/bootleg_cased_mini.tar.gz", + "bootleg_uncased": "https://bootleg-data.s3-us-west-2.amazonaws.com/models/latest/bootleg_uncased.tar.gz", + "bootleg_uncased_mini": "https://bootleg-data.s3-us-west-2.amazonaws.com/models/latest/bootleg_uncased_mini.tar.gz", } @@ -121,7 +121,7 @@ def create_sources(model_path, data_path, model_name): if not (data_path / "entity_db").exists(): print(f"{data_path / 'entity_db'} not found. Downloading..") urllib.request.urlretrieve( - "https://bootleg-data.s3.amazonaws.com/data/latest/entity_db.tar.gz", + "https://bootleg-data.s3-us-west-2.amazonaws.com/data/latest/entity_db.tar.gz", filename=str(data_path / "entity_db.tar.gz"), reporthook=DownloadProgressBar(), ) @@ -337,19 +337,21 @@ def label_mentions( """Extracts mentions and runs disambiguation. If user provides extracted_examples, we will ignore text_list Args: - text_list: list of text to disambiguate (or single sentence) + text_list: list of text to disambiguate (or single string) (can be None if extracted_examples is not None) label_func: mention extraction funciton (optional) - extracted_examples: List of Dicts of keys "sentence", "aliases", "spans", "cands" (QIDs) + extracted_examples: List of Dicts of keys "sentence", "aliases", "spans", "cands" (QIDs) (optional) Returns: Dict of * ``qids``: final predicted QIDs, * ``probs``: final predicted probs, * ``titles``: final predicted titles, - * ``cands``: all entity canddiates, + * ``cands``: all entity candidates, * ``cand_probs``: probabilities of all candidates, * ``spans``: final extracted word spans, * ``aliases``: final extracted aliases, + * ``embs``: final entity contextualized embeddings (if return_embs is True) + * ``cand_embs``: final candidate entity contextualized embeddings (if return_embs is True) """ # Check inputs are sane do_extract_mentions = True