Skip to content

Commit

Permalink
vidrl_upload: Add --human-ref-only flag
Browse files Browse the repository at this point in the history
Allows us to only ingest the human sera references as we are backfilling
the data to avoid accidentally duplicating the ferret titer data.

This flag can be removed once we've ingested all of the human sera
references that have been previously skipped.
  • Loading branch information
joverlee521 committed Aug 27, 2024
1 parent b36f7cc commit b2403ef
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions tdb/vidrl_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from titer_block import find_titer_block, find_serum_rows, find_virus_columns

parser.add_argument('--assay_type', default='hi')
parser.add_argument('--human-ref-only', action="store_true",
help="Only ingest human sera references, used for backfilling data that was skipped in previous ingests.")

ELIFE_COLUMNS = ["virus_strain", "serum_strain","serum_id", "titer", "source", "virus_passage", "virus_passage_category", "serum_passage", "serum_passage_category", "assay_type"]
EXPECTED_SUBTYPES = {"h1n1pdm", "h3n2", "vic", "yam"}
Expand Down Expand Up @@ -117,20 +119,20 @@ def parse_human_serum_references(human_serum_data, subtype):
return human_serum_references


def read_vidrl(path, fstem, assay_type, subtype):
def read_vidrl(path, fstem, assay_type, subtype, human_ref_only):
'''
Read all csv tables in path, create data frame with reference viruses as columns
'''
exten = [ os.path.isfile(path + fstem + ext) for ext in ['.xls', '.xlsm', '.xlsx'] ]

if True in exten:
ind = exten.index(True)
convert_vidrl_xls_to_tsv(path, fstem, ind, assay_type, subtype)
convert_vidrl_xls_to_tsv(path, fstem, ind, assay_type, subtype, human_ref_only)
else:
print("Unable to recognize file {}/{}".format(path,fstem))
sys.exit()

def convert_vidrl_xls_to_tsv(path, fstem, ind, assay_type, subtype):
def convert_vidrl_xls_to_tsv(path, fstem, ind, assay_type, subtype, human_ref_only):
exts = ['.xls', '.xlsm', '.xlsx']
workbook = xlrd.open_workbook(path+fstem + exts[ind])

Expand Down Expand Up @@ -281,6 +283,9 @@ def convert_vidrl_xls_to_tsv(path, fstem, ind, assay_type, subtype):
serum_id = human_serum_references[j]['serum_id']
serum_passage = human_serum_references[j]['serum_passage']
serum_strain = human_serum_references[j]['serum_strain']
# Skip other titer measurements if we only want to ingest human serum references
elif human_ref_only:
continue
else:
serum_id = str(mat.cell_value(serum_id_row_index,j)).strip().replace(' ','')
serum_passage = str(mat.cell_value(serum_passage_row_index,j)).strip()
Expand Down Expand Up @@ -332,7 +337,7 @@ def read_flat_vidrl(path, fstem, assay_type):
if args.ftype == "flat":
read_flat_vidrl(args.path, args.fstem, args.assay_type)
else:
read_vidrl(args.path, args.fstem, args.assay_type, args.subtype)
read_vidrl(args.path, args.fstem, args.assay_type, args.subtype, args.human_ref_only)

if args.preview:
command = "python tdb/elife_upload.py -db " + args.database + " --subtype " + args.subtype + " --path data/tmp/ --fstem " + args.fstem + " --preview"
Expand Down

0 comments on commit b2403ef

Please sign in to comment.