-
Notifications
You must be signed in to change notification settings - Fork 6
Maybe One‐Off commands
James Kent edited this page Mar 21, 2024
·
13 revisions
same_dois = BaseStudy.query.group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1).with_entities(BaseStudy.doi, func.array_agg(BaseStudy.id).label('id_list')).all()
keep = []
delete = []
for doi in same_dois:
if not len(doi[1]) == 2:
continue
first_id, second_id = doi[1]
first = BaseStfor doi in same_dois:
if not len(doi[1]) == 2:
continue
first_id, second_id = doi[1]
first = BaseStudy.query.filter_by(id=first_id).one()
second = BaseStudy.query.filter_by(id=second_id).one()
pmid = first.pmid or second.pmid
name = first.name or second.name
studies = first.versions + second.versions
first.name = name
first.pmid = pmid
first.versions = studies
first.description = first.description or second.description
first.year = first.year or second.year
first.publication = first.publication or second.publication
first.metadata_ = first.metadata_ or second.metadata_
keep.append(first)
delete.append(second)
print(f"doi: {doi[0]}")
print(f"name: {name}")
print(f"pmid: {pmid}")
print(f"studies: {studies}")
print(f"first source:{first.versions[0].source}")udy.query.filter_by(id=first_id).one()
second = BaseStudy.query.filter_by(id=second_id).one()
pmid = first.pmid or second.pmid
name = first.name or second.name
studies = first.versions + second.versions
first.name = name
first.pmid = pmid
first.versions = studies
first.description = first.description or second.description
first.year = first.year or second.year
first.publication = first.publication or second.publication
first.metadata_ = first.metadata_ or second.metadata_
keep.append(first)
delete.append(second)
print(f"doi: {doi[0]}")
print(f"name: {name}")
print(f"pmid: {pmid}")
print(f"studies: {studies}")
print(f"first source:{first.versions[0].source}")
dup_bs = []
for bs in BaseStudy.query.options(joinedload(BaseStudy.versions)):
if len(bs.versions) < 1:
continue
pmid = bs.versions[0].pmid
for s in bs.versions[1:]:
if not pmid:
pmid = s.pmid
if s.pmid and s.pmid != pmid:
if bs not in dup_bs:
dup_bs.append(bs)
print(bs.id)
new_bs = []
for bs in dup_bs:
orig_pmid = bs.pmid
groups = {}
for v in bs.versions:
if v.pmid == orig_pmid:
continue
if v.pmid in groups:
groups[v.pmid].append(v)
else:
groups[v.pmid] = [v]
for pmid, studies in groups.items():
new_bs.append(BaseStudy(
name=next((x.name for x in studies if x.name), None),
pmid=pmid,
doi=next((x.doi for x in studies if x.doi), None),
authors=next((x.authors for x in studies if x.authors), None),
year=next((x.year for x in studies if x.year), None),
description=next((x.description for x in studies if x.description), None),
publication=next((x.publication for x in studies if x.publication), None),
metadata_=next((x.metadata for x in studies if x.metadata_), None),
level="group",
public=True,
versions=studies,
))
db.session.add_all(new_bs)
db.session.commit()
from sqlalchemy.orm.attributes import flag_modified
to_commit = []
for p in Project.query:
if not p.provenance.get("extractionMetadata"):
continue
for ss in p.provenance['extractionMetadata']['studyStatusList']:
if ss['status'] == "COMPLETE":
ss['status'] = 'completed'
elif ss['status'] == 'SAVEFORLATER':
ss['status'] = 'savedforlater'
flag_modified(p, "provenance")
to_commit.append(p)
db.session.add_all(to_commit)
db.session.commit()
from sqlalchemy.orm import joinedload
def order_objects(objects):
# Check if all objects have table_id defined
if all(hasattr(obj, 'table_id') and obj.table_id is not None for obj in objects):
# Check if all table_ids can be represented as integers
if all(obj.table_id.isdigit() for obj in objects):
# Convert table_ids to integers and sort based on value
objects.sort(key=lambda obj: int(obj.table_id))
else:
# Sort based on string comparison of table_ids
objects.sort(key=lambda obj: obj.table_id)
else:
# Check if all names can be represented as integers
if all(obj.name.isdigit() for obj in objects):
# Convert names to integers and sort based on value
objects.sort(key=lambda obj: int(obj.name))
else:
# Sort based on string comparison of names
objects.sort(key=lambda obj: obj.name)
# Assign order attribute to each object
for i, obj in enumerate(objects):
obj.order = i
studies = Study.query.options(joinedload(Study.analyses)).all()
doi_no_pmid = BaseStudy.query.filter(and_(BaseStudy.doi != None, BaseStudy.doi != '')).filter(or_(BaseStudy.pmid == None, BaseStudy.doi == '')).all()
to_commit = []
to_delete = []
attribute_names = [column.name for column in BaseStudy.__table__.columns]
for bs in doi_no_pmid:
pmids = doi_to_pmid(bs.doi)
if len(pmids) == 1:
pmid = pmids[0]
other_bs = BaseStudy.query.filter_by(doi=bs.doi, pmid=pmid).one_or_none()
if other_bs:
for attr in ['name', 'description', 'metadata', 'publication', 'authors', 'year', 'level']:
setattr(other_bs, attr, getattr(other_bs, attr, getattr(bs, attr, None)))
other_bs.versions.extend(bs.versions)
to_delete.append(bs)
bs = other_bs
else:
bs.pmid = pmid
to_commit.append(bs)
for v in bs.versions:
v.pmid = v.pmid or pmid
to_commit.append(v)
print(bs.name)