Skip to content

Commit

Permalink
v1.9.1 Release PR (#346)
Browse files Browse the repository at this point in the history
* remove bad merge <p> element

* CU-8692kpchc Fix for Rosalind link not working (#342)

* CU-8692kpchc Add the 403 exception to vocab downloader

* CU-8692kpchc Add the new vocab download link

* Add missing self argument (#343)

To `_refset_df2dict ` method in Snomed preprocessing

* CU-8692kn0yv Fix issue with fake dict in identifier based config

More specifically the get method which was not able to return default values for non-existant keys (#341)

* CU-8692mevx8 Fix issue with filters not taking effect in train_supervised method (#345)

* CU-8692mevx8 Fix issue with filters not taking effect in train_supervised method

* CU-8692mevx8 Fix filter retention in train_supervised method

---------

Co-authored-by: tomolopolis <[email protected]>
  • Loading branch information
mart-r and tomolopolis authored Sep 21, 2023
1 parent be23503 commit ba1dc4a
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 10 deletions.
20 changes: 16 additions & 4 deletions medcat/cat.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,8 @@ def _print_stats(self,
fp_docs: Set = set()
fn_docs: Set = set()

local_filters = self.config.linking.filters.copy_of()
orig_filters = self.config.linking.filters.copy_of()
local_filters = self.config.linking.filters
for pind, project in tqdm(enumerate(data['projects']), desc="Stats project", total=len(data['projects']), leave=False):
local_filters.cuis = set()

Expand Down Expand Up @@ -645,6 +646,8 @@ def _print_stats(self,
except Exception:
traceback.print_exc()

self.config.linking.filters = orig_filters

return fps, fns, tps, cui_prec, cui_rec, cui_f1, cui_counts, examples

def _set_project_filters(self, local_filters: LinkingFilters, project: dict,
Expand Down Expand Up @@ -1033,7 +1036,13 @@ def train_supervised_raw(self,
"""
checkpoint = self._init_ckpts(is_resumed, checkpoint)

local_filters = self.config.linking.filters.copy_of()
# the config.linking.filters stuff is used directly in
# medcat.linking.context_based_linker and medcat.linking.vector_context_model
# as such, they need to be kept up to date with per-project filters
# However, the original state needs to be kept track of
# so that it can be restored after training
orig_filters = self.config.linking.filters.copy_of()
local_filters = self.config.linking.filters

fp = fn = tp = p = r = f1 = examples = {}

Expand Down Expand Up @@ -1094,7 +1103,7 @@ def train_supervised_raw(self,
if retain_filters and extra_cui_filter and not retain_extra_cui_filter:
# adding project filters without extra_cui_filters
self._set_project_filters(local_filters, project, set(), use_filters)
self.config.linking.filters.merge_with(local_filters)
orig_filters.merge_with(local_filters)
# adding extra_cui_filters, but NOT project filters
self._set_project_filters(local_filters, project, extra_cui_filter, False)
# refrain from doing it again for subsequent epochs
Expand Down Expand Up @@ -1140,7 +1149,7 @@ def train_supervised_raw(self,
checkpoint.save(self.cdb, latest_trained_step)
# if retaining MCT filters AND (if they exist) extra_cui_filters
if retain_filters:
self.config.linking.filters.merge_with(local_filters)
orig_filters.merge_with(local_filters)
# refrain from doing it again for subsequent epochs
retain_filters = False

Expand All @@ -1162,6 +1171,9 @@ def train_supervised_raw(self,
use_groups=use_groups,
extra_cui_filter=extra_cui_filter)

# reset the state of filters
self.config.linking.filters = orig_filters

return fp, fn, tp, p, r, f1, cui_counts, examples

def get_entities(self,
Expand Down
6 changes: 5 additions & 1 deletion medcat/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ class FakeDict:
"""FakeDict that allows the use of the __getitem__ and __setitem__ method for legacy access."""

def __getitem__(self, arg: str) -> Any:
return getattr(self, arg)
try:
return getattr(self, arg)
except AttributeError as e:
raise KeyError from e


def __setitem__(self, arg: str, val) -> None:
setattr(self, arg, val)
Expand Down
2 changes: 1 addition & 1 deletion medcat/utils/preprocess_snomed.py
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,7 @@ def _check_path_and_release(self):
raise FileNotFoundError('Incorrect path to SNOMED CT directory')
return paths, snomed_releases

def _refset_df2dict(refset_df: pd.DataFrame) -> dict:
def _refset_df2dict(self, refset_df: pd.DataFrame) -> dict:
"""
This function takes a SNOMED refset DataFrame as an input and converts it into a dictionary.
The DataFrame should contain the columns 'referencedComponentId','mapTarget','mapGroup','mapPriority','mapRule','mapAdvice'.
Expand Down
15 changes: 13 additions & 2 deletions tests/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ async def __call__(self, *args, **kwargs):
</body></html>
"""

ERROR_403 = b"""<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>403 Forbidden</title>
</head><body>
<h1>Forbidden</h1>
<p>You don't have permission to access this resource.</p>
</body></html>
"""

SIMPLE_WORDS = """house 34444 0.3232 0.123213 1.231231
dog 14444 0.76762 0.76767 1.45454"""

Expand All @@ -45,7 +54,7 @@ def generate_simple_vocab():


class VocabDownloader:
url = 'https://medcat.rosalind.kcl.ac.uk/media/vocab.dat'
url = 'https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat'
vocab_path = "./tmp_vocab.dat"
_has_simple = False

Expand All @@ -54,6 +63,8 @@ def is_valid(self):
content = f.read()
if content == ERROR_503:
return False
if content == ERROR_403:
return False
v = Vocab.load(self.vocab_path)
if len(v.vocab) == 2: # simple one
self._has_simple = True
Expand All @@ -64,7 +75,7 @@ def check_or_download(self):
if os.path.exists(self.vocab_path) and self.is_valid():
return
tmp = requests.get(self.url)
if tmp.content == ERROR_503:
if tmp.content == ERROR_503 or tmp.content == ERROR_403:
print('Rosalind server unavailable')
if self._has_simple:
print('Local simple vocab already present')
Expand Down
2 changes: 0 additions & 2 deletions webapp/webapp/demo/templates/train_annotations.html
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ <h5> Disclaimer </h5>
WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED.
</p><a href="mailto:[email protected]">[email protected]</a> for more information.<p>
</p>
</p><a href="mailto:[email protected]">[email protected]</a> for more information.<p>

<br />
<h5> Sample text </h5>
<pre>
Expand Down

0 comments on commit ba1dc4a

Please sign in to comment.