Skip to content

Maybe One‐Off commands

James Kent edited this page Mar 21, 2024 · 13 revisions

Find base_studies with the same doi

same_dois = BaseStudy.query.group_by(BaseStudy.doi).having(func.count(BaseStudy.doi) > 1).with_entities(BaseStudy.doi, func.array_agg(BaseStudy.id).label('id_list')).all()
keep = []
delete = []
for doi in same_dois:
    if not len(doi[1]) == 2:
        continue
    first_id, second_id = doi[1]
    first = BaseStfor doi in same_dois:
    if not len(doi[1]) == 2:
        continue
    first_id, second_id = doi[1]
    first = BaseStudy.query.filter_by(id=first_id).one()
    second = BaseStudy.query.filter_by(id=second_id).one()
    pmid = first.pmid or second.pmid
    name = first.name or second.name
    studies = first.versions + second.versions
    first.name = name
    first.pmid = pmid
    first.versions = studies
    first.description = first.description or second.description
    first.year = first.year or second.year
    first.publication = first.publication or second.publication
    first.metadata_ = first.metadata_ or second.metadata_
    keep.append(first)
    delete.append(second)
    print(f"doi: {doi[0]}")
    print(f"name: {name}")
    print(f"pmid: {pmid}")
    print(f"studies: {studies}")
    print(f"first source:{first.versions[0].source}")udy.query.filter_by(id=first_id).one()
    second = BaseStudy.query.filter_by(id=second_id).one()
    pmid = first.pmid or second.pmid
    name = first.name or second.name
    studies = first.versions + second.versions
    first.name = name
    first.pmid = pmid
    first.versions = studies
    first.description = first.description or second.description
    first.year = first.year or second.year
    first.publication = first.publication or second.publication
    first.metadata_ = first.metadata_ or second.metadata_
    keep.append(first)
    delete.append(second)
    print(f"doi: {doi[0]}")
    print(f"name: {name}")
    print(f"pmid: {pmid}")
    print(f"studies: {studies}")
    print(f"first source:{first.versions[0].source}")

Divide combined studies

dup_bs = []
for bs in BaseStudy.query.options(joinedload(BaseStudy.versions)):
    if len(bs.versions) < 1:
        continue
    pmid = bs.versions[0].pmid
    for s in bs.versions[1:]:
        if not pmid:
            pmid = s.pmid
        if s.pmid and s.pmid != pmid:
            if bs not in dup_bs:
                dup_bs.append(bs)
            print(bs.id)
new_bs = []
for bs in dup_bs:
    orig_pmid = bs.pmid
    groups = {}
    for v in bs.versions:
        if v.pmid == orig_pmid:
            continue
        if v.pmid in groups:
            groups[v.pmid].append(v)
        else:
            groups[v.pmid] = [v]
    for pmid, studies in groups.items():
        new_bs.append(BaseStudy(
            name=next((x.name for x in studies if x.name), None),
            pmid=pmid,
            doi=next((x.doi for x in studies if x.doi), None),
            authors=next((x.authors for x in studies if x.authors), None),
            year=next((x.year for x in studies if x.year), None),
            description=next((x.description for x in studies if x.description), None),
            publication=next((x.publication for x in studies if x.publication), None),
            metadata_=next((x.metadata for x in studies if x.metadata_), None),
            level="group",
            public=True,
            versions=studies,
       ))

db.session.add_all(new_bs)
db.session.commit()

Update provenance

from sqlalchemy.orm.attributes import flag_modified


to_commit = []
for p in Project.query:
    if not p.provenance.get("extractionMetadata"):
        continue
    for ss in p.provenance['extractionMetadata']['studyStatusList']: 
        if ss['status'] == "COMPLETE":
            ss['status'] = 'completed'
        elif ss['status'] == 'SAVEFORLATER':
            ss['status'] = 'savedforlater'
   
    flag_modified(p, "provenance")
    to_commit.append(p)

db.session.add_all(to_commit)
db.session.commit()

Add order to existing analysis objects

from sqlalchemy.orm import joinedload

def order_objects(objects):
    # Check if all objects have table_id defined
    if all(hasattr(obj, 'table_id') and obj.table_id is not None for obj in objects):
        # Check if all table_ids can be represented as integers
        if all(obj.table_id.isdigit() for obj in objects):
            # Convert table_ids to integers and sort based on value
            objects.sort(key=lambda obj: int(obj.table_id))
        else:
            # Sort based on string comparison of table_ids
            objects.sort(key=lambda obj: obj.table_id)
    else:
        # Check if all names can be represented as integers
        if all(obj.name.isdigit() for obj in objects):
            # Convert names to integers and sort based on value
            objects.sort(key=lambda obj: int(obj.name))
        else:
            # Sort based on string comparison of names
            objects.sort(key=lambda obj: obj.name)
    
    # Assign order attribute to each object
    for i, obj in enumerate(objects):
        obj.order = i

studies = Study.query.options(joinedload(Study.analyses)).all()

handle studies with doi, but no pmid

doi_no_pmid = BaseStudy.query.filter(and_(BaseStudy.doi != None, BaseStudy.doi != '')).filter(or_(BaseStudy.pmid == None, BaseStudy.doi == '')).all()

to_commit = []
to_delete = []
attribute_names = [column.name for column in BaseStudy.__table__.columns]
for bs in doi_no_pmid:
    pmids = doi_to_pmid(bs.doi)
    if len(pmids) == 1:
        pmid = pmids[0]
        other_bs = BaseStudy.query.filter_by(doi=bs.doi, pmid=pmid).one_or_none()
        if other_bs:
            for attr in ['name', 'description', 'metadata', 'publication', 'authors', 'year', 'level']:
                setattr(other_bs, attr, getattr(other_bs, attr, getattr(bs, attr, None)))
            other_bs.versions.extend(bs.versions)
            to_delete.append(bs)
            bs = other_bs
        else:
            bs.pmid = pmid
        to_commit.append(bs)
        for v in bs.versions:
            v.pmid = v.pmid or pmid
            to_commit.append(v)
    print(bs.name)