generated from CDCgov/template
-
Notifications
You must be signed in to change notification settings - Fork 12
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* update pages (#87) * Update issue templates * v1.2.2 FLU GISAID update * V1.2.2 Version num Update * GitHub action test (#83) * Create DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update and rename DH_upload.yml to DH_GHCR_upload.yml * Update DH_GHCR_upload.yml * Delete .github/workflows/GHCR_docker.yml * Update DH_GHCR_upload.yml * V1.2.3 Update (#85) * GitHub action test (#83) (#84) * Create DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update DH_upload.yml * Update and rename DH_upload.yml to DH_GHCR_upload.yml * Update DH_GHCR_upload.yml * Delete .github/workflows/GHCR_docker.yml * Update DH_GHCR_upload.yml * Update BioSample/SRA handler to fix bug for one sample * Dev (#86) * Version num update * Version num update * Documentation updates and bug fixes * Documentation updates * Update README.md * Update app.py * Doc updates for docker * Version num update * GH pages bug fix * template updates * v1.2.7 add epiRSV to seqsender and metadata template
- Loading branch information
1 parent
c01da2e
commit fb87e9a
Showing
14 changed files
with
392 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,316 @@ | ||
from pandera import DataFrameSchema, Column, Check, Index, MultiIndex | ||
|
||
schema = DataFrameSchema( | ||
columns={ | ||
"sequence_name": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=True, | ||
coerce=False, | ||
required=True, | ||
description="Sequence identifier used in fasta file. This is used to create the fasta file for Genbank or GISAID by updating the sequence name in your fasta file to reflect the sample name for the specified database.", | ||
title="sequence name", | ||
), | ||
"gs-sample_name": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_length(min_value=1,max_value=50), | ||
], | ||
nullable=False, | ||
unique=True, | ||
coerce=False, | ||
required=True, | ||
description="Identifier name used for GISAID. Max length is 50 characters. This field is the same as \"rsv_sequence_name\" in GISAID's metadata template.", | ||
title="sample name", | ||
), | ||
"gs-rsv_subtype": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="For RSV, there are two subtypes, \"RSV-A\" or \"RSV-B\".", | ||
title="virus subtype", | ||
), | ||
"gs-rsv_passage": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="\"Original\" if the sample was sequenced directly from swabs, otherwise add the name of the cell line (e.g., \"Vero\") used to culture the specimen.", | ||
title="passage", | ||
), | ||
"gs-rsv_location": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Format as \"Continent / Country / Region / Sub-region\".", | ||
title="location", | ||
), | ||
"gs-rsv_add_location": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Additional location information (e.g. Cruise Ship, Convention, Live animal market).", | ||
title="additional location information", | ||
), | ||
"gs-rsv_host": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Host species name. For Wastewater use \"Environment\".", | ||
title="host", | ||
), | ||
"gs-rsv_add_host_info": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Additional information regarding patient (e.g. Patient infected while interacting with animal).", | ||
title="Additional host information", | ||
), | ||
"gs-rsv_sampling_strategy": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Sampling strategy for sequence (e.g. Sentinel surveillance (ILI), Sentinel surveillance (ARI), Sentinel surveillance (SARI), Non-sentinel-surveillance (hospital), Non-sentinel-surveillance (GP network), Longitudinal sampling on same patient(s), S gene dropout).", | ||
title="sampling strategy", | ||
), | ||
"gs-rsv_gender": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"(?i)(\W|^)(male|m|female|f|unknown|missing)(\W|$)"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Synonym for \"Biological sex\". Should be \"Female\", \"Male\", or \"Unknown\".", | ||
title="gender", | ||
), | ||
"gs-rsv_patient_age": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Age in years of the person from whom the specimen was collected. May take format other than numeric years, for example, \"0.5\" (i.e., 6 months), \"5 days\", \"7 months\". If units are not given, they are assumed in years. If missing, use \"Unknown\".", | ||
title="patient age", | ||
), | ||
"gs-rsv_patient_status": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="E.g., \"Hospitalized\", \"Released\", \"Live\", \"Deceased\", \"Unknown\".", | ||
title="patient status", | ||
), | ||
"gs-rsv_specimen": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Specimen source. For wastewater it must be \"Wastewater surveillance\".", | ||
title="specimen source", | ||
), | ||
"gs-rsv_outbreak": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Outbreak information (Date, Location e.g. type of gathering, Family cluster, etc.).", | ||
title="outbreak information", | ||
), | ||
"gs-rsv_last_vaccinated": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Provide details if applicable.", | ||
title="last vaccinated", | ||
), | ||
"gs-rsv_treatment": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Provide details if applicable (e.g. Drug name, dosage).", | ||
title="treatment", | ||
), | ||
"gs-rsv_seq_technology": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Add the sequencer brand and model (e.g. Illumina MiSeq, Sanger, Nanopore MinION).", | ||
title="sequencing technology", | ||
), | ||
"gs-rsv_assembly_method": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Genome assembly algorithm (e.g. CLC Genomics Workbench 12, Geneious 10.2.4, SPAdes/MEGAHIT v1.2.9, UGENE v. 33).", | ||
title="assembly method", | ||
), | ||
"gs-rsv_coverage": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="Average genome coverage (e.g. 50x, 100x, 1,000x).", | ||
title="average coverage", | ||
), | ||
"gs-rsv_orig_lab": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Full name of laboratory from where sample originated.", | ||
title="originating lab", | ||
), | ||
"gs-rsv_orig_lab_addr": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Complete building address of laboratory from where sample originated.", | ||
title="originating lab address", | ||
), | ||
"gs-rsv_provider_sample_id": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="ID used by originating lab.", | ||
title="provider sample id", | ||
), | ||
"gs-rsv_subm_lab": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Full name of laboratory submitting this record to GISAID.", | ||
title="submitting lab", | ||
), | ||
"gs-rsv_subm_lab_addr": Column( | ||
dtype="object", | ||
checks=[ | ||
Check.str_matches(r"^(?!\s*$).+"), | ||
], | ||
nullable=False, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Complete building address of the submitting laboratory.", | ||
title="submitting lab address", | ||
), | ||
"gs-rsv_subm_sample_id": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=False, | ||
description="ID used by submitting lab.", | ||
title="submitter sample id", | ||
), | ||
"gs-rsv_comment": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Leave blank.", | ||
title="comment", | ||
), | ||
"gs-comment_type": Column( | ||
dtype="object", | ||
checks=None, | ||
nullable=True, | ||
unique=False, | ||
coerce=False, | ||
required=True, | ||
description="Leave blank.", | ||
title="comment type", | ||
), | ||
}, | ||
checks=None, | ||
index=None, | ||
coerce=False, | ||
strict="filter", | ||
name="gisaid_cov_schema", | ||
ordered=False, | ||
unique=None, | ||
report_duplicates="all", | ||
unique_column_names=True, | ||
add_missing_columns=False, | ||
title="seqsender GISAID COV schema", | ||
description="Schema validation for GISAID SARS-COV2 database.", | ||
) |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.