Skip to content

Commit

Permalink
Merge pull request #313 from brunoamaral/testing-classes
Browse files Browse the repository at this point in the history
implements ClinicalTrial class and moves code to functions.py
  • Loading branch information
brunoamaral authored Feb 19, 2023
2 parents 927aeb9 + 06f322f commit 1d964aa
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 15 deletions.
41 changes: 41 additions & 0 deletions django/gregory/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ def clean_abstract(self=None,abstract=None):
del tag[attribute]
return str(soup)

def clean_url(self=None):
from gregory.functions import remove_utm
if self.link != None:
self.link = remove_utm(self.link)
else:
print('no url found')


def refresh(self):
from db_maintenance.unpaywall import unpaywall_utils
from crossref.restful import Works, Etiquette
Expand Down Expand Up @@ -135,3 +143,36 @@ def find_doi(self,title=None):
if i == 5:
return None


class ClinicalTrial:
def __init__(self, title=None, summary=None, link=None, published_date=None, relevant=None, identifiers=None):
self.title = title
self.summary = summary
self.link = link
self.published_date = published_date
self.relevant = relevant
self.identifiers = identifiers
def __str__(self):
return f"{self.title}, {self.identifiers}"
def __repr__(self):
return f"{self.title}, \"{self.identifiers}\""

def clean_summary(self=None,summary=None):
from bs4 import BeautifulSoup
import html
if summary == None and self.summary != None:
summary = self.summary
if summary != None:
summary = html.unescape(summary)
soup = BeautifulSoup(summary,'html.parser')
for tag in soup():
for attribute in ["class", "id", "name", "style"]:
del tag[attribute]
return str(soup)

def clean_url(self=None):
from gregory.functions import remove_utm
if self.link != None:
self.link = remove_utm(self.link)
else:
print('no url found')
22 changes: 7 additions & 15 deletions django/gregory/feedreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,14 @@
import os
import re
import gregory.functions as greg
from gregory.classes import SciencePaper
from gregory.classes import SciencePaper, ClinicalTrial
from django.utils import timezone
import pytz
SITE = CustomSetting.objects.get(site__domain=os.environ.get('DOMAIN_NAME'))
CLIENT_WEBSITE = 'https://' + SITE.site.domain + '/'
my_etiquette = Etiquette(SITE.title, 'v8', CLIENT_WEBSITE, SITE.admin_email)
works = Works(etiquette=my_etiquette)

def remove_utm(url):
u = urlparse(url)
query = parse_qs(u.query, keep_blank_values=True)
query.pop('utm_source', None)
query.pop('utm_medium', None)
query.pop('utm_campaign', None)
query.pop('utm_content', None)
u = u._replace(query=urlencode(query, True))
return urlunparse(u)

class FeedReaderTask(CronJobBase):
RUN_EVERY_MINS = 30
schedule = Schedule(run_every_mins=RUN_EVERY_MINS)
Expand Down Expand Up @@ -64,7 +54,7 @@ def do(self):
published = parse(entry['published'])
else:
published = parse(entry['prism_coverdate'])
link = remove_utm(entry['link'])
link = greg.remove_utm(entry['link'])
###
# This is a bad solution but it will have to do for now
###
Expand Down Expand Up @@ -137,7 +127,7 @@ def do(self):
published = entry.get('published')
if published:
published = parse(entry['published'])
link = remove_utm(entry['link'])
link = greg.remove_utm(entry['link'])
eudract = None
euct = None
nct = None
Expand All @@ -149,7 +139,9 @@ def do(self):
if 'clinicaltrials.gov' in link:
nct = entry['guid']
identifiers = {"eudract": eudract, "euct": euct, "nct": nct}
clinical_trial = ClinicalTrial(title = entry['title'], summary = summary, link = link, published_date = published, identifiers = identifiers,)
clinical_trial.clean_summary()
try:
trial = Trials.objects.create( discovery_date=timezone.now(), title = entry['title'], summary = summary, link = link, published_date = published, identifiers=identifiers, source = i)
trial = Trials.objects.create( discovery_date=timezone.now(), title = clinical_trial.title, summary = clinical_trial.summary, link = clinical_trial.link, published_date = clinical_trial.published_date, identifiers=clinical_trial.identifiers, source = i)
except:
pass
pass
12 changes: 12 additions & 0 deletions django/gregory/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,18 @@
from joblib import load
from .models import Articles
from django_cron import CronJobBase, Schedule
from urllib.parse import urlencode, urlparse, urlunparse, parse_qs

def remove_utm(url):
u = urlparse(url)
query = parse_qs(u.query, keep_blank_values=True)
query.pop('utm_source', None)
query.pop('utm_medium', None)
query.pop('utm_campaign', None)
query.pop('utm_content', None)
u = u._replace(query=urlencode(query, True))
return urlunparse(u)


def get_doi(title):
doi = None
Expand Down

0 comments on commit 1d964aa

Please sign in to comment.