generated from linkml/linkml-project-template
-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #199 from include-dcc/validation_updates
NA and other minor error handling
- Loading branch information
Showing
9 changed files
with
171 additions
and
155 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,37 @@ | ||
from src.include_linkml.include_pydantic import Study, Participant, Condition, Biospecimen | ||
from src.data_validation.validation_utils import handle_nan | ||
from pydantic import ValidationError | ||
|
||
|
||
def validate_biospecimen_entry(row): | ||
try: | ||
instance = Biospecimen( | ||
studyCode=row['Study Code'], | ||
participantGlobalId=row['Participant Global ID'], | ||
participantExternalId=row['Participant External ID'], | ||
sampleGlobalId=row['Sample Global ID'], | ||
sampleExternalId=row['Sample External ID'], | ||
sampleType=row['Sample Type'], | ||
ageAtBiospecimenCollection=row['Age At Biospecimen Collection'], | ||
parentSampleGlobalId=row['Parent Sample Global ID'], | ||
parentSampleExternalId=row['Parent Sample External ID'], | ||
parentSampleType=row['Parent Sample Type'], | ||
collectionGlobalId=row['Collection Global ID'], | ||
collectionExternalId=row['Collection External ID'], | ||
collectionSampleType=row['Collection Sample Type'], | ||
containerGlobalId=row['Container Global ID'], | ||
containerExternalId=row['Container External ID'], | ||
volume=row['Volume'], | ||
volumeUnit=row['Volume Unit'], | ||
concentration=row['Concentration'], | ||
concentrationUnit=row['Concentration Unit'], | ||
laboratoryProcedure=row['Laboratory Procedure'], | ||
biospecimenStorage=row['Biospecimen Storage'], | ||
sampleAvailability=row['Sample Availability'], | ||
containerAvailability=row['Container Availability'], | ||
studyCode=handle_nan(row['study code']), | ||
participantGlobalId=handle_nan(row['participant global id']), | ||
participantExternalId=handle_nan(row['participant external id']), | ||
sampleGlobalId=handle_nan(row['sample global id']), | ||
sampleExternalId=handle_nan(row['sample external id']), | ||
sampleType=handle_nan(row['sample type']), | ||
ageAtBiospecimenCollection=handle_nan(row['age at biospecimen collection']), | ||
parentSampleGlobalId=handle_nan(row['parent sample global id']), | ||
parentSampleExternalId=handle_nan(row['parent sample external id']), | ||
parentSampleType=handle_nan(row['parent sample type']), | ||
collectionGlobalId=handle_nan(row['collection global id']), | ||
collectionExternalId=handle_nan(row['collection external id']), | ||
collectionSampleType=handle_nan(row['collection sample type']), | ||
containerGlobalId=handle_nan(row['container global id']), | ||
containerExternalId=handle_nan(row['container external id']), | ||
volume=handle_nan(row['volume']), | ||
volumeUnit=handle_nan(row['volume unit']), | ||
concentration=handle_nan(row['concentration']), | ||
concentrationUnit=handle_nan(row['concentration unit']), | ||
laboratoryProcedure=handle_nan(row['laboratory procedure']), | ||
biospecimenStorage=handle_nan(row['biospecimen storage']), | ||
sampleAvailability=row['sample availability'], | ||
containerAvailability=row['container availability'] | ||
) | ||
# Validation successful | ||
return True, None | ||
except ValidationError as e: | ||
# Validation failed | ||
error_details = (row['Study Code'] + "-" + row['Sample External ID'], e) | ||
error_details = (str(row['study code']) + "-" + str(row['sample external id']), e) | ||
return False, error_details |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,34 @@ | ||
from src.include_linkml.include_pydantic import Study, Participant, Condition | ||
from src.data_validation.validation_utils import handle_nan | ||
from pydantic import ValidationError | ||
|
||
|
||
def validate_condition_entry(row): | ||
try: | ||
instance = Condition( | ||
studyCode=row['Study Code'], | ||
participantGlobalId=row['Participant Global ID'], | ||
participantExternalId=row['Participant External ID'], | ||
eventId=row['Event ID'], | ||
eventType=row['Event Type'], | ||
conditionMeasureSourceText=row['Condition or Measure Source Text'], | ||
ageAtConditionMeasureObservation=row['Age At Condition or Measure Observation'], | ||
conditionInterpretation=row['Condition Interpretation'], | ||
conditionStatus=row['Condition Status'], | ||
conditionDataSource=row['Condition Data Source'], | ||
hpoLabel=row['HPO Label'], | ||
hpoCode=row['HPO Code'], | ||
mondoLabel=row['MONDO Label'], | ||
mondoCode=row['MONDO Code'], | ||
maxoLabel=row['MAXO Label'], | ||
maxoCode=row['MAXO Code'], | ||
otherLabel=row['Other Label'], | ||
otherCode=row['Other Code'], | ||
measureValue=row['Measure Value'], | ||
measureUnit=row['Measure Unit'] | ||
studyCode=row['study code'], | ||
participantGlobalId=handle_nan(row['participant global id']), | ||
participantExternalId=handle_nan(row['participant external id']), | ||
eventId=handle_nan(row['event id']), | ||
eventType=handle_nan(row['event type']), | ||
conditionMeasureSourceText=handle_nan(row['condition or measure source text']), | ||
ageAtConditionMeasureObservation=handle_nan(row['age at condition or measure observation']), | ||
conditionInterpretation=row['condition interpretation'], | ||
conditionStatus=row['condition status'], | ||
conditionDataSource=row['condition data source'], | ||
hpoLabel=handle_nan(row['hpo label']), | ||
hpoCode=handle_nan(row['hpo code']), | ||
mondoLabel=handle_nan(row['mondo label']), | ||
mondoCode=handle_nan(row['mondo code']), | ||
maxoLabel=handle_nan(row['maxo label']), | ||
maxoCode=handle_nan(row['maxo code']), | ||
otherLabel=handle_nan(row['other label']), | ||
otherCode=handle_nan(row['other code']), | ||
measureValue=handle_nan(row['measure value']), | ||
measureUnit=handle_nan(row['measure unit']) | ||
) | ||
# Validation successful | ||
return True, None | ||
except ValidationError as e: | ||
# Validation failed | ||
error_details = (row['Study Code'] + "-" + str(row['Participant External ID']) + "-" + row['Event ID'], e) | ||
error_details = (str(row['study code']) + "-" + str(row['participant external id']) + "-" + str(row['event id']), e) | ||
return False, error_details |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,33 @@ | ||
from src.include_linkml.include_pydantic import Study, Participant, Condition, Biospecimen, DataFile | ||
from src.data_validation.validation_utils import handle_nan | ||
from pydantic import ValidationError | ||
|
||
|
||
def validate_datafile_entry(row): | ||
try: | ||
instance = DataFile( | ||
studyCode=row['Study Code'], | ||
participantGlobalId=row['Participant Global ID'], | ||
participantExternalId=row['Participant External ID'], | ||
sampleGlobalId=row['Sample Global ID'], | ||
sampleExternalId=row['Sample External ID'], | ||
fileName=row['File Name'], | ||
fileGlobalId=row['File Global ID'], | ||
fileS3Location=row['File S3 Location'], | ||
fileUploadLocation=row['File Upload Location'], | ||
drsUri=row['DRS URI'], | ||
fileHash=row['File Hash'], | ||
dataAccess=row['Data Access'], | ||
dataCategory=row['Data Category'], | ||
dataType=row['Data Type'], | ||
experimentalStrategy=str(row['Experimental Strategy']).split('|'), | ||
experimentalPlatform=str(row['Experimental Platform']).split('|'), | ||
fileFormat=row['File Format'], | ||
fileSize=int(row['File Size']), | ||
fileSizeUnit=row['File Size Unit'] | ||
studyCode=row['study code'], | ||
participantGlobalId=handle_nan(row['participant global id']), | ||
participantExternalId=handle_nan(row['participant external id']), | ||
sampleGlobalId=handle_nan(row['sample global id']), | ||
sampleExternalId=handle_nan(row['sample external id']), | ||
fileName=handle_nan(row['file name']), | ||
fileGlobalId=handle_nan(row['file global id']), | ||
fileS3Location=handle_nan(row['file s3 location']), | ||
fileUploadLocation=handle_nan(row['file upload location']), | ||
drsUri=handle_nan(row['drs uri']), | ||
fileHash=handle_nan(row['file hash']), | ||
dataAccess=row['data access'], | ||
dataCategory=row['data category'], | ||
dataType=handle_nan(row['data type']), | ||
experimentalStrategy=row['experimental strategy'].split('|') if handle_nan(row['experimental strategy']) else [], | ||
experimentalPlatform=row['experimental platform'].split('|') if handle_nan(row['experimental platform']) else [], | ||
fileFormat=handle_nan(row['file format']), | ||
fileSize=handle_nan(row['file size']), | ||
fileSizeUnit=handle_nan(row['file size unit']) | ||
) | ||
# Validation successful | ||
return True, None | ||
except ValidationError as e: | ||
# Validation failed | ||
error_details = (row['Sample External ID'] + "-" + row['File Global ID'], e) | ||
error_details = (str(row['sample external id']) + "-" + str(row['file global id']), e) | ||
return False, error_details |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,34 @@ | ||
from src.include_linkml.include_pydantic import Study, Participant, Condition, Biospecimen, DataFile, Dataset | ||
from src.data_validation.validation_utils import handle_nan | ||
from pydantic import ValidationError | ||
|
||
|
||
def validate_dataset_entry(row): | ||
try: | ||
instance = Dataset( | ||
studyCode=row['Study Code'], | ||
datasetName=row['Dataset Name'], | ||
datasetDescription=row['Dataset Description'], | ||
datasetGlobalId=row['Dataset Global ID'], | ||
datasetExternalId=row['Dataset External ID'], | ||
expectedNumberOfParticipants=int(row['Expected Number of Participants']), | ||
expectedNumberOfFiles=int(row['Expected Number of Files']), | ||
dataCollectionStartYear=row['Data Collection Start Year'], | ||
dataCollectionEndYear=row['Data Collection End Year'], | ||
dataCategory=row['Data Category'], | ||
dataType=row['Data Type'], | ||
experimentalStrategy=str(row['Experimental Strategy']).split('|'), | ||
experimentalPlatform=str(row['Experimental Platform']).split('|'), | ||
publication=str(row['Publication']).split('|'), | ||
accessLimitations=row['Access Limitations'], | ||
accessRequirements=row['Access Requirements'], | ||
dbgap=str(row['DbGaP']).split('|'), | ||
otherRepository=row['Other Repository'], | ||
otherAccessAuthority=row['Other Access Authority'], | ||
isHarmonized=bool(row['Is Harmonized?']) | ||
studyCode=row['study code'], | ||
datasetName=handle_nan(row['dataset name']), | ||
datasetDescription=handle_nan(row['dataset description']), | ||
datasetGlobalId=handle_nan(row['dataset global id']), | ||
datasetExternalId=handle_nan(row['dataset external id']), | ||
expectedNumberOfParticipants=handle_nan(row['expected number of participants']), | ||
expectedNumberOfFiles=handle_nan(row['expected number of files']), | ||
dataCollectionStartYear=handle_nan(row['data collection start year']), | ||
dataCollectionEndYear=handle_nan(row['data collection end year']), | ||
dataCategory=row['data category'], | ||
dataType=handle_nan(row['data type']), | ||
experimentalStrategy=row['experimental strategy'].split('|') if handle_nan(row['experimental strategy']) else [], | ||
experimentalPlatform=row['experimental platform'].split('|') if handle_nan(row['experimental platform']) else [], | ||
publication=row['publication'].split('|') if handle_nan(row['publication']) else [], | ||
accessLimitations=handle_nan(row['access limitations']), | ||
accessRequirements=handle_nan(row['access requirements']), | ||
dbgap=row['dbgap'].split('|') if handle_nan(row['dbgap']) else [], | ||
otherRepository=handle_nan(row['other repository']), | ||
otherAccessAuthority=handle_nan(row['other access authority']), | ||
isHarmonized=bool(row['is harmonized?']) | ||
) | ||
# Validation successful | ||
return True, None | ||
except ValidationError as e: | ||
# Validation failed | ||
error_details = (row['Dataset Name'] + "-" + str(row['Dataset External ID']), e) | ||
error_details = (row['dataset name'] + "-" + str(row['dataset external id']), e) | ||
return False, error_details |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,20 +1,20 @@ | ||
from src.include_linkml.include_pydantic import Study, Participant, Condition, Biospecimen, DataFile, Dataset, DatasetManifest | ||
from src.data_validation.validation_utils import handle_nan | ||
from pydantic import ValidationError | ||
|
||
|
||
def validate_datasetmanifest_entry(row): | ||
try: | ||
instance = DatasetManifest( | ||
studyCode=row['Study Code'], | ||
datasetName=row['Dataset Name'], | ||
datasetGlobalId=row['Dataset Global ID'], | ||
datasetExternalId=row['Dataset External ID'], | ||
fileName=row['File Name'], | ||
fileGlobalId=row['File Global ID'] | ||
studyCode=row['study code'], | ||
datasetName=handle_nan(row['dataset name']), | ||
datasetGlobalId=handle_nan(row['dataset global id']), | ||
datasetExternalId=handle_nan(row['dataset external id']), | ||
fileName=handle_nan(row['file name']), | ||
fileGlobalId=handle_nan(row['file global id']) | ||
) | ||
# Validation successful | ||
return True, None | ||
except ValidationError as e: | ||
# Validation failed | ||
error_details = (row['Dataset Name'] + "-" + str(row['Dataset External ID']), e) | ||
error_details = (str(row['dataset name']) + "-" + str(row['dataset external id']), e) | ||
return False, error_details |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,32 +1,32 @@ | ||
from src.include_linkml.include_pydantic import Study, Participant | ||
from src.data_validation.validation_utils import handle_nan | ||
from pydantic import ValidationError | ||
|
||
|
||
def validate_participant_entry(row): | ||
try: | ||
instance = Participant( | ||
studyCode=row['Study Code'], | ||
participantGlobalId=row['Participant Global ID'], | ||
participantExternalId=row['Participant External ID'], | ||
familyId=row['Family ID'], | ||
familyType=row['Family Type'], | ||
fatherId=row['Father ID'], | ||
motherId=row['Mother ID'], | ||
siblingId=row['Sibling ID'], | ||
otherFamilyMemberId=row['Other Family Member ID'], | ||
familyRelationship=row['Family Relationship'], | ||
sex=row['Sex'], | ||
race=row['Race'], | ||
ethnicity=row['Ethnicity'], | ||
downSyndromeStatus=row['Down Syndrome Status'], | ||
ageAtFirstPatientEngagement=row['Age at First Patient Engagement'], | ||
firstPatientEngagementEvent=row['First Patient Engagement Event'], | ||
outcomesVitalStatus=row['Outcomes Vital Status'], | ||
ageAtLastVitalStatus=row['Age at Last Vital Status'] | ||
studyCode=row['study code'], | ||
participantGlobalId=handle_nan(row['participant global id']), | ||
participantExternalId=handle_nan(row['participant external id']), | ||
familyId=handle_nan(row['family id']), | ||
familyType=row['family type'], | ||
fatherId=handle_nan(row['father id']), | ||
motherId=handle_nan(row['mother id']), | ||
siblingId=handle_nan(row['sibling id']), | ||
otherFamilyMemberId=handle_nan(row['other family member id']), | ||
familyRelationship=row['family relationship'], | ||
sex=row['sex'], | ||
race=row['race'], | ||
ethnicity=row['ethnicity'], | ||
downSyndromeStatus=row['down syndrome status'], | ||
ageAtFirstPatientEngagement=handle_nan(row['age at first patient engagement']), | ||
firstPatientEngagementEvent=handle_nan(row['first patient engagement event']), | ||
outcomesVitalStatus=row['outcomes vital status'], | ||
ageAtLastVitalStatus=handle_nan(row['age at last vital status']) | ||
) | ||
# Validation successful | ||
return True, None | ||
except ValidationError as e: | ||
# Validation failed | ||
error_details = (row['Study Code'] + "-" + row['Participant External ID'], e) | ||
error_details = (str(row['study code']) + "-" + str(row['participant external id']), e) | ||
return False, error_details |
Oops, something went wrong.