From f7885b9e11472172f4076b69ffea5bfa36956203 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Wed, 24 Sep 2014 18:39:30 -0400 Subject: [PATCH 01/51] Inital work for task #9098 - `provide subject id in report` --- bin/redi.py | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 8eb55bc..197e685 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -1107,9 +1107,17 @@ def research_id_to_redcap_id_converter( This function converts the research_id to redcap_id 1. prepare a dictionary with [key, value] --> [study_id, redcap_id] 2. replace the element tree study_id with the new redcap_id's - for each bad id, log it as warn + for each bad id, log it as warn. + + Example of xml fragment produced: + + HEMOGLOBIN + 1534435 + 1234 +... + 1 + """ - # read each of the study_id's from the data etree study_id_recap_id_dict = {} @@ -1127,26 +1135,21 @@ def research_id_to_redcap_id_converter( mapping_xml) mapping_data = etree.parse(mapping_xml) - redcap_id_field_name = mapping_data.getroot().findtext( - 'redcap_id_field_name') - research_id_field_name = mapping_data.getroot().findtext( - 'research_id_field_name') + root = mapping_data.getroot() + redcap_id_field_name = root.findtext('redcap_id_field_name') + research_id_field_name = root.findtext('research_id_field_name') if research_id_field_name is None or research_id_field_name == '': logger.error( - 'research_id_field_name tag in file %s is not present', - mapping_xml) + 'research_id_field_name tag in file %s is not present', mapping_xml) raise Exception( - 'research_id_field_name tag in file %s is not present', - mapping_xml) + 'research_id_field_name tag in file %s is not present', mapping_xml) if redcap_id_field_name is None or redcap_id_field_name == '': logger.error( - 'redcap_id_field_name tag in file %s is not present', - mapping_xml) + 'redcap_id_field_name tag in file %s is not present', mapping_xml) raise Exception( - 'redcap_id_field_name tag in file %s is not present', - mapping_xml) + 'redcap_id_field_name tag in file %s is not present', mapping_xml) try: # Communication with redcap @@ -1176,12 +1179,15 @@ def research_id_to_redcap_id_converter( for subject in data.iter('subject'): study_id = subject.findtext('STUDY_ID') - # tag = subject.find('STUDY_ID') + # if the study id is not null populate the dictionary if study_id is not None and study_id != '' and study_id in redcap_dict: - # if the study_id in redcap_dict of redcap id's update the study_id - # with redcap id - subject.find('STUDY_ID').text = redcap_dict[study_id] + # if the study_id is in the dictionary then replace it by the redcap_id + lab_id_ele = subject.find('STUDY_ID') + + # save the original subject id from the lab data as an attribute + subject.set('lab_id', lab_id_ele.text) + lab_id_ele.text = redcap_dict[study_id] elif study_id is not None and study_id != '' and study_id not in redcap_dict: # add the bad research id to list of bad ids bad_ids[study_id] += 1 From ca0e443aab3e8860cf9933b834a493397732016c Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 25 Sep 2014 09:38:26 -0400 Subject: [PATCH 02/51] Pass unittest with attribute `lab_id` --- test/TestResearchIdToRedcapId.py | 93 ++++++++++++-------------------- 1 file changed, 33 insertions(+), 60 deletions(-) diff --git a/test/TestResearchIdToRedcapId.py b/test/TestResearchIdToRedcapId.py index 5363a9c..a5bbba0 100644 --- a/test/TestResearchIdToRedcapId.py +++ b/test/TestResearchIdToRedcapId.py @@ -19,7 +19,7 @@ class TestResearchIdToRedcapId(unittest.TestCase): def setUp(self): self.sortedData = """ - + HEMOGLOBIN 1534435 @@ -28,8 +28,9 @@ def setUp(self): 16.0 g/dL - 999-0059 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + 999-0001 + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + WBC 999 @@ -38,39 +39,19 @@ def setUp(self): - 999-0059 - cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat - - PLATELET COUNT - 1009 - 92 - - - - - 999-0059 - cbccbc_lbdtccbc_completecbc_nximportplat_lborresplat_lborresuplat_lbstat - - HEMOGLOBIN - 1534435 - 9.5 - 12.0 - 16.0 - g/dL - - 999-0059 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - """ + 999-0002 + cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat + +""" self.data = etree.ElementTree(etree.fromstring(self.sortedData)) - self.serverResponse = """ - + self.serverResponse = """ + -""" - +""" self.output = """ - + HEMOGLOBIN 1534435 10.5 @@ -79,8 +60,9 @@ def setUp(self): g/dL 1 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + + WBC 999 5.4 @@ -88,38 +70,20 @@ def setUp(self): - 1 - cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat - - PLATELET COUNT - 1009 - 92 - - - - - 1 - cbccbc_lbdtccbc_completecbc_nximportplat_lborresplat_lborresuplat_lbstat - - HEMOGLOBIN - 1534435 - 9.5 - 12.0 - 16.0 - g/dL - - 1 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - """ + 2 + cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat + +""" self.expect = etree.tostring(etree.fromstring(self.output)) self.configuration_directory = tempfile.mkdtemp('/') self.research_id_to_redcap_id = "research_id_to_redcap_id_map.xml" try: f = open(os.path.join(self.configuration_directory, self.research_id_to_redcap_id), "w+") - f.write(""" - dm_subjid - dm_usubjid + f.write(""" + + dm_subjid + dm_usubjid """) f.close() except: @@ -131,7 +95,16 @@ def dummy_redcapClient_initializer(self, redcap_uri, token, verify_ssl): def dummy_get_data_from_redcap(self,records_to_fecth=[],events_to_fetch=[], fields_to_fetch=[], forms_to_fetch=[], return_format='xml'): dummy_output = """ - + + + + + + + + + + """ return dummy_output From 33b20895db452ce007636855b4c9883a29528b2b Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 25 Sep 2014 10:20:02 -0400 Subject: [PATCH 03/51] Improve error message `There is no subjects in the raw data` + Cleanup extra imports + No functionality changes --- bin/redi.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 197e685..e06329e 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -1117,14 +1117,15 @@ def research_id_to_redcap_id_converter( ... 1 + + Note: The next function which reads the "data" tree + is #create_empty_event_tree_for_study() """ # read each of the study_id's from the data etree study_id_recap_id_dict = {} - ''' Configuration data from the mapping xml - - ''' - mapping_xml = os.path.join(configuration_directory,\ + # Configuration data from the mapping xml + mapping_xml = os.path.join(configuration_directory, research_id_to_redcap_id) # read the field names from the research_id_to_redcap_id_map.xml @@ -1363,7 +1364,7 @@ def create_empty_events_for_one_subject( form_events_tree, translation_table_tree): #logger.debug('Creating all form events template for one subject') - from lxml import etree + root = etree.Element("all_form_events") form_event_root = form_events_tree.getroot() translation_table_root = translation_table_tree.getroot() @@ -1436,13 +1437,15 @@ def create_empty_events_for_one_subject( def create_empty_event_tree_for_study(raw_data_tree, all_form_events_tree): """ - This function uses raw_data_tree and all_form_events_tree and creates a person_form_event_tree for study + This function uses raw_data_tree and all_form_events_tree and creates + a person_form_event_tree for study + :param raw_data_tree: This parameter holds raw data tree :param all_form_events_tree: This parameter holds all form events tree """ logger.info('Creating all form events template for all subjects') - from lxml import etree - root = etree.Element("person_form_event") + + pfe_element = etree.Element("person_form_event") raw_data_root = raw_data_tree.getroot() all_form_events_root = all_form_events_tree.getroot() if raw_data_root is None: @@ -1456,25 +1459,23 @@ def create_empty_event_tree_for_study(raw_data_tree, all_form_events_tree): subjects_list.add(subject.find('STUDY_ID').text) if not subjects_list: - raise Exception('There is no subjects in the raw data') + raise Exception("There are no subjects in the raw data. " \ + "This can be caused by an incorrect input file or "\ + "by lack of enrollment data in the REDCap database." ) for subject_id in subjects_list: person = etree.Element("person") study_id = etree.SubElement(person, "study_id") study_id.text = subject_id - person.insert( - person.index( - person.find('study_id')) + 1, - etree.XML( - etree.tostring( - all_form_events_root, - method='html', - pretty_print=True))) - root.append(person) + person_index = person.index(person.find('study_id')) + 1 - tree = etree.ElementTree(root) - return tree + # insert the pretty-fied form events + pretty_form_events = etree.XML( + etree.tostring(all_form_events_root, method='html', pretty_print=True)) + person.insert(person_index, pretty_form_events) + pfe_element.append(person) + return etree.ElementTree(pfe_element) def setStat( event, From f9dcb4a1f008422bbd1c7be5095e42b1bcf62aa5 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 25 Sep 2014 10:46:59 -0400 Subject: [PATCH 04/51] Reformat xml for easy reading in `test/TestCreateEmptyEventTreeForStudy.py` --- test/TestCreateEmptyEventTreeForStudy.py | 203 ++++++++++++----------- 1 file changed, 105 insertions(+), 98 deletions(-) diff --git a/test/TestCreateEmptyEventTreeForStudy.py b/test/TestCreateEmptyEventTreeForStudy.py index 48b7f33..99ec82b 100644 --- a/test/TestCreateEmptyEventTreeForStudy.py +++ b/test/TestCreateEmptyEventTreeForStudy.py @@ -12,39 +12,36 @@ class TestCreateEmptyEventTreeForStudy(unittest.TestCase): def setUp(self): - self.all_form_events = """
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - -
-
- chemistry + self.all_form_events = """ + + + cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + + + +
+ chemistry + + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres + + +
+
+ inr - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - -
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
+ 1_arm_1 + inr_lbdtcinr_completeinr_nximport + + +
""" - self.data_all_form_events= etree.ElementTree(etree.fromstring(self.all_form_events)) - return() - def test_create_empty_event_tree_for_study_for_zero_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.zero_subjects = """ @@ -71,38 +68,42 @@ def test_create_empty_event_tree_for_study_for_one_subjects(self): """ self.data_one_subject= etree.ElementTree(etree.fromstring(self.one_subject)) - self.output_one_subject = """123
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - -
+ self.output_one_subject = """ + + + 123 + +
+ cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres +
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
+
+ inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport +
+
+
+
""" self.expect_one_subject = etree.tostring(etree.fromstring(self.output_one_subject)) - - self.result = etree.tostring(redi.create_empty_event_tree_for_study(self.data_one_subject,self.data_all_form_events)) - self.assertEqual(self.expect_one_subject, self.result) - + self.result = etree.tostring( + redi.create_empty_event_tree_for_study(self.data_one_subject,self.data_all_form_events)) + clean_expected = ''.join(self.expect_one_subject.split()) + clean_result = ''.join(self.result.split()) + self.assertEqual(clean_expected, clean_result) + def test_create_empty_event_tree_for_study_for_two_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.two_subjects = """ @@ -131,62 +132,68 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): """ self.data_two_subjects= etree.ElementTree(etree.fromstring(self.two_subjects)) - self.output_two_subjects = """1234
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - + self.output_two_subjects = """ + + + 1234 + + + cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres +
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
123
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - + + inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+
+
+ + 123 + +
+ cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres +
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
+
+ inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+
+
+
""" self.expect_two_subjects = etree.tostring(etree.fromstring(self.output_two_subjects)) - - self.result = etree.tostring(redi.create_empty_event_tree_for_study(self.data_two_subjects,self.data_all_form_events)) - self.assertEqual(self.expect_two_subjects, self.result) + self.result = etree.tostring( + redi.create_empty_event_tree_for_study(self.data_two_subjects,self.data_all_form_events)) + clean_expected = ''.join(self.expect_two_subjects.split()) + clean_result = ''.join(self.result.split()) + self.assertEqual(clean_expected, clean_result) def tearDown(self): return() From 305a53395209108e7b3eac1b3b78d632fdebbcf6 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 25 Sep 2014 11:46:49 -0400 Subject: [PATCH 05/51] Modify `create_empty_event_tree_for_study()` to copy `lab_id` as a `person` element attribute --- bin/redi.py | 12 ++++++++---- test/TestCreateEmptyEventTreeForStudy.py | 12 ++++++------ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index e06329e..b47d694 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -1453,18 +1453,22 @@ def create_empty_event_tree_for_study(raw_data_tree, all_form_events_tree): if all_form_events_root is None: raise Exception('All form Events tree is empty') - subjects_list = set() + subjects_dict = {} + # Collect the `study_id => lab_id` mappings for subject in raw_data_root.iter('subject'): - subjects_list.add(subject.find('STUDY_ID').text) + study_id = subject.findtext('STUDY_ID') + subjects_dict[study_id] = subject.attrib['lab_id'] - if not subjects_list: + if not subjects_dict: raise Exception("There are no subjects in the raw data. " \ "This can be caused by an incorrect input file or "\ "by lack of enrollment data in the REDCap database." ) - for subject_id in subjects_list: + for subject_id in subjects_dict.iterkeys(): person = etree.Element("person") + # Copy `lab_id` attribute from `subject` to `person` element + person.set('lab_id', subjects_dict.get(subject_id)) study_id = etree.SubElement(person, "study_id") study_id.text = subject_id person_index = person.index(person.find('study_id')) + 1 diff --git a/test/TestCreateEmptyEventTreeForStudy.py b/test/TestCreateEmptyEventTreeForStudy.py index 99ec82b..c73f2e8 100644 --- a/test/TestCreateEmptyEventTreeForStudy.py +++ b/test/TestCreateEmptyEventTreeForStudy.py @@ -54,7 +54,7 @@ def test_create_empty_event_tree_for_study_for_one_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.one_subject = """ - + TestSubject 123456 123 @@ -70,7 +70,7 @@ def test_create_empty_event_tree_for_study_for_one_subjects(self): self.output_one_subject = """ - + 123
@@ -108,7 +108,7 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.two_subjects = """ - + TestSubject_1 123456 123 @@ -118,7 +118,7 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): 123 cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - + TestSubject_2 123456 123 @@ -134,7 +134,7 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): self.output_two_subjects = """ - + 1234 @@ -160,7 +160,7 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): - + 123
From b1669b71eb7cc952bbcd31d0d145d2d9aa1e38ed Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 25 Sep 2014 15:35:44 -0400 Subject: [PATCH 06/51] Remove unused file: `bin/utils/report.xsd` --- bin/utils/report.xsd | 111 ------------------------------------------- 1 file changed, 111 deletions(-) delete mode 100644 bin/utils/report.xsd diff --git a/bin/utils/report.xsd b/bin/utils/report.xsd deleted file mode 100644 index 29d5dfb..0000000 --- a/bin/utils/report.xsd +++ /dev/null @@ -1,111 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file From e86207f37edc38efc6a0a88c6a80c74a905d7670 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 25 Sep 2014 15:39:57 -0400 Subject: [PATCH 07/51] Add the original subject id - `lab_id` to the summary report TODO: - sort by lab_id - fix test/TestGenerateOutput.py --- bin/redi.py | 17 ++++++++++--- bin/redi_lib.py | 1 + bin/utils/report.xsl | 44 ++++++++++++++++++++++++--------- test/TestCreateSummaryReport.py | 39 +++++++++++++++++------------ 4 files changed, 69 insertions(+), 32 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index b47d694..b0cc830 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -1306,15 +1306,20 @@ def updateReportAlerts(root, alert_summary): msg = etree.SubElement(values_alert, 'message') msg.text = value - def updateSubjectDetails(root, subject_details): + """ + Helper method for #create_summary_report() + Adds subject information to the xml tree which is later formated + by `bin/utils/report.xsl` into the html `table#subject_details"` + """ subjectsDetails = root[3] for key in sorted(subject_details.keys()): - subject = etree.SubElement(subjectsDetails, "Subject") + subject = etree.SubElement(subjectsDetails, "subject") details = subject_details.get(key) - subjectId = etree.SubElement(subject, "ID") - subjectId.text = key + redcap_id_ele = gen_ele("redcap_id", key) + subject.append(redcap_id_ele) forms = etree.SubElement(subject, "forms") + for k in sorted(details.keys()): if(k.endswith("_Forms")): form = etree.SubElement(forms, "form") @@ -2050,6 +2055,10 @@ def store(self, pfe_tree): method="xml", pretty_print=True) +def gen_ele(ele_name, ele_text): + """ Create an xml element with given name and content """ + return etree.XML("<{}>{}".format(ele_name, ele_text, ele_name)) + if __name__ == "__main__": main() diff --git a/bin/redi_lib.py b/bin/redi_lib.py index b5a2f1b..d543d20 100644 --- a/bin/redi_lib.py +++ b/bin/redi_lib.py @@ -164,6 +164,7 @@ def generate_output(person_tree, redcap_settings, email_settings, data_repositor # init dictionary for a new person in (study_id) if study_id_key not in subject_details: subject_details[study_id_key] = {} + subject_details[study_id_key]['lab_id'] = person.get('lab_id') if not form_key in subject_details[study_id_key]: subject_details[study_id_key][form_key] = 0 diff --git a/bin/utils/report.xsl b/bin/utils/report.xsl index cf60ad2..78ce204 100644 --- a/bin/utils/report.xsl +++ b/bin/utils/report.xsl @@ -7,11 +7,27 @@ - + Summary Report @@ -44,7 +60,7 @@

Summary

- +
@@ -114,10 +130,11 @@

Subject Details

-
Total Subjects
+
- + + - + +
SubjectLab IDREDCap ID @@ -126,10 +143,13 @@
- + + + @@ -142,7 +162,7 @@

Errors

- +
@@ -154,4 +174,4 @@ - \ No newline at end of file + diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 239ce7b..3bc9431 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -31,10 +31,10 @@ def setUp(self): 'Total_cbc_Forms': 53 }, 'subject_details': { - '60': {'cbc_Forms': 1, 'chemistry_Forms': 1}, - '61': {'cbc_Forms': 2, 'chemistry_Forms': 1}, - '63': {'cbc_Forms': 11, 'chemistry_Forms': 4}, - '59': {'cbc_Forms': 39, 'chemistry_Forms': 16} + '60': {'cbc_Forms': 1, 'chemistry_Forms': 1, 'lab_id': '999-0060'}, + '61': {'cbc_Forms': 2, 'chemistry_Forms': 1, 'lab_id': '999-0061'}, + '63': {'cbc_Forms': 11, 'chemistry_Forms': 4, 'lab_id': '999-0063'}, + '59': {'cbc_Forms': 39, 'chemistry_Forms': 16, 'lab_id': '999-0059'} }, 'errors' : [], } @@ -92,7 +92,8 @@ def setUp(self): - 59 + + 59 cbc_Forms @@ -103,9 +104,10 @@ def setUp(self): 16 - - - 60 + 999-0059 + + + 60
cbc_Forms @@ -115,8 +117,10 @@ def setUp(self): 1
-
- 61 + 999-0060 + + + 61
cbc_Forms @@ -127,9 +131,10 @@ def setUp(self): 1
-
- - 63 + 999-0061 + + + 63
cbc_Forms @@ -140,7 +145,8 @@ def setUp(self): 4
-
+ 999-0063 +
@@ -222,10 +228,10 @@ def setUp(self): - + - + @@ -240,6 +246,7 @@ def setUp(self): + From 865f0f53799ea22e4a4f12f90922a57bc342d0e8 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Fri, 26 Sep 2014 12:25:25 -0400 Subject: [PATCH 08/51] Add configuration option to allow sorting by lab_id or redcap_id --- bin/redi.py | 12 +++++- bin/utils/SimpleConfigParser.py | 1 + bin/utils/report.xsl | 70 ++++++++++++++++++++++++--------- config-example/settings.ini | 4 ++ test/TestCreateSummaryReport.py | 6 ++- test/TestGenerateOutput.py | 12 +++--- 6 files changed, 78 insertions(+), 27 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index b0cc830..925b503 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -245,7 +245,9 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, report_parameters = { 'report_file_path': report_file_path, 'project': settings.project, - 'redcap_uri': settings.redcap_uri} + 'redcap_uri': settings.redcap_uri, + 'is_sort_by_lab_id': settings.is_sort_by_lab_id, + } report_xsl = proj_root + "bin/utils/report.xsl" send_email = settings.send_email @@ -1263,6 +1265,10 @@ def create_summary_report(report_parameters, report_data, alert_summary, \ updateReportAlerts(root, alert_summary) updateReportErrors(root, report_data['errors']) updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict) + + sort_by_value = 'lab_id' if report_parameters['is_sort_by_lab_id'] else 'redcap_id' + root.append(gen_ele("sort_details_by", sort_by_value)) + tree = etree.ElementTree(root) write_element_tree_to_file(tree,report_parameters.get('report_file_path')) return tree @@ -2059,6 +2065,10 @@ def gen_ele(ele_name, ele_text): """ Create an xml element with given name and content """ return etree.XML("<{}>{}".format(ele_name, ele_text, ele_name)) +def gen_subele(parent, subele_name, subele_text): + subele = etree.SubElement(parent, subele_name) + subele.text = subele_text + return subele if __name__ == "__main__": main() diff --git a/bin/utils/SimpleConfigParser.py b/bin/utils/SimpleConfigParser.py index d2f57f9..1e8463c 100755 --- a/bin/utils/SimpleConfigParser.py +++ b/bin/utils/SimpleConfigParser.py @@ -117,6 +117,7 @@ "replace_fields_in_raw_data_xml": None, "include_rule_errors_in_report": False, "redcap_support_sender_email": 'please-do-not-reply@example.com', + "is_sort_by_lab_id": True, } class ConfigurationError(Exception): diff --git a/bin/utils/report.xsl b/bin/utils/report.xsl index 78ce204..b28316a 100644 --- a/bin/utils/report.xsl +++ b/bin/utils/report.xsl @@ -1,13 +1,11 @@ - - - - - - - - - Summary Report - - + + + + + + + + Summary Report + +

Data Import Report

@@ -67,9 +77,9 @@ table#errors thead { + - + @@ -79,9 +89,9 @@ table#errors thead { + - +
-
-

@@ -130,7 +140,7 @@ table#errors thead {

Subject Details

- +
@@ -138,12 +148,34 @@ table#errors thead { + + + + + + diff --git a/config-example/settings.ini b/config-example/settings.ini index a0d4076..28e54a6 100644 --- a/config-example/settings.ini +++ b/config-example/settings.ini @@ -61,6 +61,10 @@ report_file_path = report.xml # name of the report file in html format, which will be stored at this location. # Optional parameter report_file_path2 = report.html + +# Optional parameter used to choose sorting of the data +# in the summary email by `lab_id` or by `redcap_id` +is_sort_by_lab_id = Y # --- /section_redi_emails # ------------------------------------------------------------------------------- diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 3bc9431..cb6f1bc 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -22,7 +22,9 @@ def setUp(self): self.test_report_params = { 'project': 'hcvtarget-uf', 'report_file_path': proj_root + 'config/report.xml', - 'redcap_uri': 'https://hostname.org'} + 'redcap_uri': 'https://hostname.org', + 'is_sort_by_lab_id': True, + } self.test_report_data = { 'total_subjects': 5, @@ -154,6 +156,7 @@ def setUp(self): 320.0 + lab_id''' self.schema_str = StringIO('''\ @@ -264,6 +267,7 @@ def setUp(self): + diff --git a/test/TestGenerateOutput.py b/test/TestGenerateOutput.py index 2044b33..499bb28 100755 --- a/test/TestGenerateOutput.py +++ b/test/TestGenerateOutput.py @@ -53,7 +53,7 @@ def test_person_form_event(self): + "#test_person_form_event() using xml: " ) string_1_xml = """ - + 100
@@ -79,7 +79,7 @@ def test_person_form_event(self):
- + 99 @@ -128,7 +128,7 @@ def test_person_form_event(self): - + 98
@@ -163,9 +163,9 @@ def test_person_form_event(self): form_details = {'Total_cbc_Forms': 2, 'Total_inr_Forms': 3} subject_details = { - '98' : {'Total_cbc_Forms' : 0, 'Total_inr_Forms' : 1 }, - '99' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1 }, - '100' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1 } + '98' : {'Total_cbc_Forms' : 0, 'Total_inr_Forms' : 1, 'lab_id': "999-0098" }, + '99' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1, "lab_id": "999-0099" }, + '100' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1, "lab_id": "999-0100" } } report_data = { From 34d7d3f794229a1f415719e2a7c22b846212a110 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Mon, 29 Sep 2014 12:06:39 -0400 Subject: [PATCH 09/51] Cleanup unnecessary code in `test/TestCreateSummaryReport.py` --- test/TestCreateSummaryReport.py | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index cb6f1bc..065786b 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -9,10 +9,6 @@ import time import redi -file_dir = os.path.dirname(os.path.realpath(__file__)) -goal_dir = os.path.join(file_dir, "../") -proj_root = os.path.abspath(goal_dir)+'/' - DEFAULT_DATA_DIRECTORY = os.getcwd() class TestCreateSummaryReport(unittest.TestCase): @@ -21,7 +17,7 @@ def setUp(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.test_report_params = { 'project': 'hcvtarget-uf', - 'report_file_path': proj_root + 'config/report.xml', + 'report_file_path': os.path.join(DEFAULT_DATA_DIRECTORY, 'unittest_report.xml'), 'redcap_uri': 'https://hostname.org', 'is_sort_by_lab_id': True, } @@ -275,13 +271,11 @@ def setUp(self): return def test_create_summary_report(self): - + """ + Validates the summary xml structure using xsd + Validate the summary xml content + """ sys.path.append('config') - self.newpath = proj_root+'config' - self.configFolderCreatedNow = False - if not os.path.exists(self.newpath): - self.configFolderCreatedNow = True - os.makedirs(self.newpath) result = redi.create_summary_report(\ self.test_report_params, \ @@ -294,20 +288,16 @@ def test_create_summary_report(self): xml_schema = etree.XMLSchema(xmlschema_doc) # validate the xml against the xsd schema self.assertEqual(xml_schema.validate(result), True) + # validate the actual data in xml but strip the white space first parser = etree.XMLParser(remove_blank_text=True) clean_tree = etree.XML(self.expected_xml, parser=parser) self.expected_xml = etree.tostring(clean_tree) - self.assertEqual(self.expected_xml, result_string) def tearDown(self): # delete the created xml file - with open(proj_root + 'config/report.xml'): - os.remove(proj_root + 'config/report.xml') - - if self.configFolderCreatedNow: - os.rmdir(self.newpath) + os.remove(self.test_report_params['report_file_path']) return if __name__ == '__main__': From 54f2ae23779e423dc97b1e618d3a270aba93727f Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Mon, 29 Sep 2014 12:10:23 -0400 Subject: [PATCH 10/51] Improve comment in `bin/redi.py` about `create_summary_report()` Note: no functionality changes --- bin/redi.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 925b503..519da36 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -1252,6 +1252,10 @@ def configure_logging(data_folder, verbose=False): def create_summary_report(report_parameters, report_data, alert_summary, \ collection_date_summary_dict): + """ + Generates the xml to be transformed by `bin/utils/report.xsl` + into an html report with details about data import completed. + """ root = etree.Element("report") root.append(etree.Element("header")) root.append(etree.Element("summary")) @@ -1266,6 +1270,7 @@ def create_summary_report(report_parameters, report_data, alert_summary, \ updateReportErrors(root, report_data['errors']) updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict) + # TODO: remove dependency on the order of the xml elements in the report sort_by_value = 'lab_id' if report_parameters['is_sort_by_lab_id'] else 'redcap_id' root.append(gen_ele("sort_details_by", sort_by_value)) @@ -1273,7 +1278,6 @@ def create_summary_report(report_parameters, report_data, alert_summary, \ write_element_tree_to_file(tree,report_parameters.get('report_file_path')) return tree - def updateReportHeader(root, report_parameters): """ Update the passed `root` element tree with date, project name and url""" header = root[0] @@ -1322,8 +1326,7 @@ def updateSubjectDetails(root, subject_details): for key in sorted(subject_details.keys()): subject = etree.SubElement(subjectsDetails, "subject") details = subject_details.get(key) - redcap_id_ele = gen_ele("redcap_id", key) - subject.append(redcap_id_ele) + subject.append(gen_ele("redcap_id", key)) forms = etree.SubElement(subject, "forms") for k in sorted(details.keys()): From a4d8eeadbd73532cf0c1f8882c280b1b73c4f688 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Mon, 29 Sep 2014 12:12:08 -0400 Subject: [PATCH 11/51] Fix `bin/utils/report.xsl` to implement sorting by: `lab_id/redcap_id` --- bin/utils/report.xsl | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/bin/utils/report.xsl b/bin/utils/report.xsl index b28316a..81f62ee 100644 --- a/bin/utils/report.xsl +++ b/bin/utils/report.xsl @@ -2,6 +2,8 @@ + + @@ -153,29 +155,16 @@ $(document).ready(function() {
- + - + -
Lab ID -
@@ -154,7 +186,7 @@ table#errors thead { -
+ +
From f2635b9b98603d8039e72c4765847cd953c06564 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Mon, 29 Sep 2014 12:40:46 -0400 Subject: [PATCH 12/51] Move caption outside the `thead` in `bin/utils/report.xsl` --- bin/utils/report.xsl | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/bin/utils/report.xsl b/bin/utils/report.xsl index 81f62ee..52063cd 100644 --- a/bin/utils/report.xsl +++ b/bin/utils/report.xsl @@ -29,11 +29,10 @@ table#errors thead { } - - - - - + + + + - - - - - Summary Report - - + + + + + + + + + Summary Report + +

Data Import Report

@@ -71,16 +44,16 @@ $(document).ready(function() {

Summary

- +
+ - + @@ -90,9 +63,9 @@ $(document).ready(function() { + - +
Total Subjects -
-

@@ -141,40 +114,27 @@ $(document).ready(function() {

Subject Details

- - +
- -
- - + + - - - + - + @@ -182,7 +142,7 @@ $(document).ready(function() {
Subject IDSubject NumberSubject -
- - - + -

Errors

- +
@@ -194,4 +154,4 @@ $(document).ready(function() { - + \ No newline at end of file diff --git a/config-example/settings.ini b/config-example/settings.ini index a0cb0ec..a8c5e1d 100644 --- a/config-example/settings.ini +++ b/config-example/settings.ini @@ -70,10 +70,6 @@ report_file_path = report.xml # Use this parameter to specify the file name for the report email stored in html format. # Optional parameter report_file_path2 = report.html - -# Optional parameter used to choose sorting of the data -# in the summary email by `lab_id` or by `redcap_id` -is_sort_by_lab_id = Y # --- /section_redi_emails # ------------------------------------------------------------------------------- diff --git a/test/TestCreateEmptyEventTreeForStudy.py b/test/TestCreateEmptyEventTreeForStudy.py index c73f2e8..48b7f33 100644 --- a/test/TestCreateEmptyEventTreeForStudy.py +++ b/test/TestCreateEmptyEventTreeForStudy.py @@ -12,36 +12,39 @@ class TestCreateEmptyEventTreeForStudy(unittest.TestCase): def setUp(self): - self.all_form_events = """ - - - cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - - -
- chemistry - - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - -
-
- inr + self.all_form_events = """ + cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + + + +
+ chemistry - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
-
+ 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres + + + + +
+ inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+ +
""" + self.data_all_form_events= etree.ElementTree(etree.fromstring(self.all_form_events)) + return() + def test_create_empty_event_tree_for_study_for_zero_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.zero_subjects = """ @@ -54,7 +57,7 @@ def test_create_empty_event_tree_for_study_for_one_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.one_subject = """ - + TestSubject 123456 123 @@ -68,47 +71,43 @@ def test_create_empty_event_tree_for_study_for_one_subjects(self): """ self.data_one_subject= etree.ElementTree(etree.fromstring(self.one_subject)) - self.output_one_subject = """ - - - 123 - -
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - -
+ self.output_one_subject = """123
+ cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + + +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres + +
-
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport -
-
-
-
+ +
+ inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+ +
""" self.expect_one_subject = etree.tostring(etree.fromstring(self.output_one_subject)) - self.result = etree.tostring( - redi.create_empty_event_tree_for_study(self.data_one_subject,self.data_all_form_events)) - clean_expected = ''.join(self.expect_one_subject.split()) - clean_result = ''.join(self.result.split()) - self.assertEqual(clean_expected, clean_result) - + + self.result = etree.tostring(redi.create_empty_event_tree_for_study(self.data_one_subject,self.data_all_form_events)) + self.assertEqual(self.expect_one_subject, self.result) + def test_create_empty_event_tree_for_study_for_two_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.two_subjects = """ - + TestSubject_1 123456 123 @@ -118,7 +117,7 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): 123 cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - + TestSubject_2 123456 123 @@ -132,68 +131,62 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): """ self.data_two_subjects= etree.ElementTree(etree.fromstring(self.two_subjects)) - self.output_two_subjects = """ - - - 1234 - -
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - + self.output_two_subjects = """1234 + cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - -
-
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres + +
-
-
- - 123 - -
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - + + + inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+ +
123
+ cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - -
-
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres + +
-
-
-
+ +
+ inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+ +
""" self.expect_two_subjects = etree.tostring(etree.fromstring(self.output_two_subjects)) - self.result = etree.tostring( - redi.create_empty_event_tree_for_study(self.data_two_subjects,self.data_all_form_events)) - clean_expected = ''.join(self.expect_two_subjects.split()) - clean_result = ''.join(self.result.split()) - self.assertEqual(clean_expected, clean_result) + + self.result = etree.tostring(redi.create_empty_event_tree_for_study(self.data_two_subjects,self.data_all_form_events)) + self.assertEqual(self.expect_two_subjects, self.result) def tearDown(self): return() diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 065786b..239ce7b 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -9,6 +9,10 @@ import time import redi +file_dir = os.path.dirname(os.path.realpath(__file__)) +goal_dir = os.path.join(file_dir, "../") +proj_root = os.path.abspath(goal_dir)+'/' + DEFAULT_DATA_DIRECTORY = os.getcwd() class TestCreateSummaryReport(unittest.TestCase): @@ -17,10 +21,8 @@ def setUp(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.test_report_params = { 'project': 'hcvtarget-uf', - 'report_file_path': os.path.join(DEFAULT_DATA_DIRECTORY, 'unittest_report.xml'), - 'redcap_uri': 'https://hostname.org', - 'is_sort_by_lab_id': True, - } + 'report_file_path': proj_root + 'config/report.xml', + 'redcap_uri': 'https://hostname.org'} self.test_report_data = { 'total_subjects': 5, @@ -29,10 +31,10 @@ def setUp(self): 'Total_cbc_Forms': 53 }, 'subject_details': { - '60': {'cbc_Forms': 1, 'chemistry_Forms': 1, 'lab_id': '999-0060'}, - '61': {'cbc_Forms': 2, 'chemistry_Forms': 1, 'lab_id': '999-0061'}, - '63': {'cbc_Forms': 11, 'chemistry_Forms': 4, 'lab_id': '999-0063'}, - '59': {'cbc_Forms': 39, 'chemistry_Forms': 16, 'lab_id': '999-0059'} + '60': {'cbc_Forms': 1, 'chemistry_Forms': 1}, + '61': {'cbc_Forms': 2, 'chemistry_Forms': 1}, + '63': {'cbc_Forms': 11, 'chemistry_Forms': 4}, + '59': {'cbc_Forms': 39, 'chemistry_Forms': 16} }, 'errors' : [], } @@ -90,8 +92,7 @@ def setUp(self): - - 59 + 59
cbc_Forms @@ -102,10 +103,9 @@ def setUp(self): 16
- 999-0059 -
- - 60 + + + 60
cbc_Forms @@ -115,10 +115,8 @@ def setUp(self): 1
- 999-0060 -
- - 61 + + 61
cbc_Forms @@ -129,10 +127,9 @@ def setUp(self): 1
- 999-0061 -
- - 63 + + + 63
cbc_Forms @@ -143,8 +140,7 @@ def setUp(self): 4
- 999-0063 -
+
@@ -152,7 +148,6 @@ def setUp(self): 3 20.0 - lab_id ''' self.schema_str = StringIO('''\ @@ -227,10 +222,10 @@ def setUp(self): - + - + @@ -245,7 +240,6 @@ def setUp(self): - @@ -263,7 +257,6 @@ def setUp(self): - @@ -271,11 +264,13 @@ def setUp(self): return def test_create_summary_report(self): - """ - Validates the summary xml structure using xsd - Validate the summary xml content - """ + sys.path.append('config') + self.newpath = proj_root+'config' + self.configFolderCreatedNow = False + if not os.path.exists(self.newpath): + self.configFolderCreatedNow = True + os.makedirs(self.newpath) result = redi.create_summary_report(\ self.test_report_params, \ @@ -288,16 +283,20 @@ def test_create_summary_report(self): xml_schema = etree.XMLSchema(xmlschema_doc) # validate the xml against the xsd schema self.assertEqual(xml_schema.validate(result), True) - # validate the actual data in xml but strip the white space first parser = etree.XMLParser(remove_blank_text=True) clean_tree = etree.XML(self.expected_xml, parser=parser) self.expected_xml = etree.tostring(clean_tree) + self.assertEqual(self.expected_xml, result_string) def tearDown(self): # delete the created xml file - os.remove(self.test_report_params['report_file_path']) + with open(proj_root + 'config/report.xml'): + os.remove(proj_root + 'config/report.xml') + + if self.configFolderCreatedNow: + os.rmdir(self.newpath) return if __name__ == '__main__': diff --git a/test/TestGenerateOutput.py b/test/TestGenerateOutput.py index 782bce9..c5e9bb0 100755 --- a/test/TestGenerateOutput.py +++ b/test/TestGenerateOutput.py @@ -37,7 +37,7 @@ def test_person_form_event(self): + "#test_person_form_event() using xml: " ) string_1_xml = """ - + 100
@@ -63,7 +63,7 @@ def test_person_form_event(self): - + 99 @@ -112,7 +112,7 @@ def test_person_form_event(self): - + 98
@@ -144,9 +144,9 @@ def test_person_form_event(self): form_details = {'Total_cbc_Forms': 2, 'Total_inr_Forms': 3} subject_details = { - '98' : {'Total_cbc_Forms' : 0, 'Total_inr_Forms' : 1, 'lab_id': "999-0098" }, - '99' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1, "lab_id": "999-0099" }, - '100' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1, "lab_id": "999-0100" } + '98' : {'Total_cbc_Forms' : 0, 'Total_inr_Forms' : 1 }, + '99' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1 }, + '100' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1 } } report_data = { diff --git a/test/TestResearchIdToRedcapId.py b/test/TestResearchIdToRedcapId.py index 318b88f..be0cd93 100644 --- a/test/TestResearchIdToRedcapId.py +++ b/test/TestResearchIdToRedcapId.py @@ -19,7 +19,7 @@ class TestResearchIdToRedcapId(unittest.TestCase): def setUp(self): self.sortedData = """ - + HEMOGLOBIN 1534435 @@ -28,9 +28,8 @@ def setUp(self): 16.0 g/dL - 999-0001 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - + 999-0059 + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat WBC 999 @@ -39,19 +38,39 @@ def setUp(self): - 999-0002 - cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat - -""" + 999-0059 + cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat + + PLATELET COUNT + 1009 + 92 + + + + + 999-0059 + cbccbc_lbdtccbc_completecbc_nximportplat_lborresplat_lborresuplat_lbstat + + HEMOGLOBIN + 1534435 + 9.5 + 12.0 + 16.0 + g/dL + + 999-0059 + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + """ self.data = etree.ElementTree(etree.fromstring(self.sortedData)) - self.serverResponse = """ - + self.serverResponse = """ + -""" +""" + self.output = """ - + HEMOGLOBIN 1534435 10.5 @@ -60,9 +79,8 @@ def setUp(self): g/dL 1 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - - + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + WBC 999 5.4 @@ -70,20 +88,38 @@ def setUp(self): - 2 - cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat - -""" + 1 + cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat + + PLATELET COUNT + 1009 + 92 + + + + + 1 + cbccbc_lbdtccbc_completecbc_nximportplat_lborresplat_lborresuplat_lbstat + + HEMOGLOBIN + 1534435 + 9.5 + 12.0 + 16.0 + g/dL + + 1 + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + """ self.expect = etree.tostring(etree.fromstring(self.output)) self.configuration_directory = tempfile.mkdtemp('/') self.research_id_to_redcap_id = "research_id_to_redcap_id_map.xml" try: f = open(os.path.join(self.configuration_directory, self.research_id_to_redcap_id), "w+") - f.write(""" - - dm_subjid - dm_usubjid + f.write(""" + dm_subjid + dm_usubjid """) f.close() except: @@ -95,16 +131,7 @@ def dummy_redcapClient_initializer(self, redcap_uri, token, verify_ssl): def dummy_get_data_from_redcap(self,records_to_fecth=[],events_to_fetch=[], fields_to_fetch=[], forms_to_fetch=[], return_format='xml'): dummy_output = """ - - - - - - - - - - + """ return dummy_output diff --git a/vagrant/aliases b/vagrant/aliases index 768bc1c..d7a53d6 100644 --- a/vagrant/aliases +++ b/vagrant/aliases @@ -1,5 +1,4 @@ alias db='mysql --prompt="(\u@\h) [\d]> " --pager="less -niSFX" -uroot -ppassword redcap' alias check_redcap="curl -s http://localhost/redcap/ | grep -i 'Welcome\|Critical Error' " alias restart_httpd='sudo /etc/init.d/apache2 reload ' -alias restart_mysql='sudo service mysql restart' alias lsa='ls -al --color=auto' From 111d4f2e8ca6b2f79421cb14daaa69c4e3207d26 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Wed, 8 Oct 2014 14:28:04 -0400 Subject: [PATCH 18/51] This reverts commit 6947fe2edd806342efc59d602f4a3325525ed2cf. Because we want to keep the code. --- bin/redi.py | 129 ++++++++------ bin/redi_lib.py | 1 + bin/utils/SimpleConfigParser.py | 1 + bin/utils/redi_email.py | 68 +++++--- bin/utils/report.xsd | 111 ------------ bin/utils/report.xsl | 98 +++++++---- config-example/settings.ini | 4 + test/TestCreateEmptyEventTreeForStudy.py | 209 ++++++++++++----------- test/TestCreateSummaryReport.py | 69 ++++---- test/TestGenerateOutput.py | 12 +- test/TestResearchIdToRedcapId.py | 93 ++++------ vagrant/aliases | 1 + 12 files changed, 386 insertions(+), 410 deletions(-) delete mode 100644 bin/utils/report.xsd diff --git a/bin/redi.py b/bin/redi.py index 9a7b33a..eddb0af 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -288,7 +288,9 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, report_parameters = { 'report_file_path': report_file_path, 'project': settings.project, - 'redcap_uri': settings.redcap_uri} + 'redcap_uri': settings.redcap_uri, + 'is_sort_by_lab_id': settings.is_sort_by_lab_id, + } report_xsl = proj_root + "bin/utils/report.xsl" send_email = settings.send_email @@ -1087,16 +1089,25 @@ def research_id_to_redcap_id_converter( This function converts the research_id to redcap_id 1. prepare a dictionary with [key, value] --> [study_id, redcap_id] 2. replace the element tree study_id with the new redcap_id's - for each bad id, log it as warn + for each bad id, log it as warn. + + Example of xml fragment produced: + + HEMOGLOBIN + 1534435 + 1234 +... + 1 + + + Note: The next function which reads the "data" tree + is #create_empty_event_tree_for_study() """ - # read each of the study_id's from the data etree study_id_recap_id_dict = {} - ''' Configuration data from the mapping xml - - ''' - mapping_xml = os.path.join(configuration_directory,\ + # Configuration data from the mapping xml + mapping_xml = os.path.join(configuration_directory, research_id_to_redcap_id) # read the field names from the research_id_to_redcap_id_map.xml @@ -1107,26 +1118,21 @@ def research_id_to_redcap_id_converter( mapping_xml) mapping_data = etree.parse(mapping_xml) - redcap_id_field_name = mapping_data.getroot().findtext( - 'redcap_id_field_name') - research_id_field_name = mapping_data.getroot().findtext( - 'research_id_field_name') + root = mapping_data.getroot() + redcap_id_field_name = root.findtext('redcap_id_field_name') + research_id_field_name = root.findtext('research_id_field_name') if research_id_field_name is None or research_id_field_name == '': logger.error( - 'research_id_field_name tag in file %s is not present', - mapping_xml) + 'research_id_field_name tag in file %s is not present', mapping_xml) raise Exception( - 'research_id_field_name tag in file %s is not present', - mapping_xml) + 'research_id_field_name tag in file %s is not present', mapping_xml) if redcap_id_field_name is None or redcap_id_field_name == '': logger.error( - 'redcap_id_field_name tag in file %s is not present', - mapping_xml) + 'redcap_id_field_name tag in file %s is not present', mapping_xml) raise Exception( - 'redcap_id_field_name tag in file %s is not present', - mapping_xml) + 'redcap_id_field_name tag in file %s is not present', mapping_xml) # query the redcap for the response with redcap id's response = redcap_client.get_data_from_redcap( @@ -1146,12 +1152,15 @@ def research_id_to_redcap_id_converter( for subject in data.iter('subject'): study_id = subject.findtext('STUDY_ID') - # tag = subject.find('STUDY_ID') + # if the study id is not null populate the dictionary if study_id is not None and study_id != '' and study_id in redcap_dict: - # if the study_id in redcap_dict of redcap id's update the study_id - # with redcap id - subject.find('STUDY_ID').text = redcap_dict[study_id] + # if the study_id is in the dictionary then replace it by the redcap_id + lab_id_ele = subject.find('STUDY_ID') + + # save the original subject id from the lab data as an attribute + subject.set('lab_id', lab_id_ele.text) + lab_id_ele.text = redcap_dict[study_id] elif study_id is not None and study_id != '' and study_id not in redcap_dict: # add the bad research id to list of bad ids bad_ids[study_id] += 1 @@ -1213,6 +1222,10 @@ def configure_logging(data_folder, verbose=False): def create_summary_report(report_parameters, report_data, alert_summary, \ collection_date_summary_dict): + """ + Generates the xml to be transformed by `bin/utils/report.xsl` + into an html report with details about data import completed. + """ root = etree.Element("report") root.append(etree.Element("header")) root.append(etree.Element("summary")) @@ -1226,11 +1239,15 @@ def create_summary_report(report_parameters, report_data, alert_summary, \ updateReportAlerts(root, alert_summary) updateReportErrors(root, report_data['errors']) updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict) + + # TODO: remove dependency on the order of the xml elements in the report + sort_by_value = 'lab_id' if report_parameters['is_sort_by_lab_id'] else 'redcap_id' + root.append(gen_ele("sort_details_by", sort_by_value)) + tree = etree.ElementTree(root) write_element_tree_to_file(tree,report_parameters.get('report_file_path')) return tree - def updateReportHeader(root, report_parameters): """ Update the passed `root` element tree with date, project name and url""" header = root[0] @@ -1269,15 +1286,19 @@ def updateReportAlerts(root, alert_summary): msg = etree.SubElement(values_alert, 'message') msg.text = value - def updateSubjectDetails(root, subject_details): + """ + Helper method for #create_summary_report() + Adds subject information to the xml tree which is later formated + by `bin/utils/report.xsl` into the html `table#subject_details"` + """ subjectsDetails = root[3] for key in sorted(subject_details.keys()): - subject = etree.SubElement(subjectsDetails, "Subject") + subject = etree.SubElement(subjectsDetails, "subject") details = subject_details.get(key) - subjectId = etree.SubElement(subject, "ID") - subjectId.text = key + subject.append(gen_ele("redcap_id", key)) forms = etree.SubElement(subject, "forms") + for k in sorted(details.keys()): if(k.endswith("_Forms")): form = etree.SubElement(forms, "form") @@ -1327,7 +1348,7 @@ def create_empty_events_for_one_subject( form_events_tree, translation_table_tree): #logger.debug('Creating all form events template for one subject') - from lxml import etree + root = etree.Element("all_form_events") form_event_root = form_events_tree.getroot() translation_table_root = translation_table_tree.getroot() @@ -1400,13 +1421,15 @@ def create_empty_events_for_one_subject( def create_empty_event_tree_for_study(raw_data_tree, all_form_events_tree): """ - This function uses raw_data_tree and all_form_events_tree and creates a person_form_event_tree for study + This function uses raw_data_tree and all_form_events_tree and creates + a person_form_event_tree for study + :param raw_data_tree: This parameter holds raw data tree :param all_form_events_tree: This parameter holds all form events tree """ logger.info('Creating all form events template for all subjects') - from lxml import etree - root = etree.Element("person_form_event") + + pfe_element = etree.Element("person_form_event") raw_data_root = raw_data_tree.getroot() all_form_events_root = all_form_events_tree.getroot() if raw_data_root is None: @@ -1414,31 +1437,33 @@ def create_empty_event_tree_for_study(raw_data_tree, all_form_events_tree): if all_form_events_root is None: raise Exception('All form Events tree is empty') - subjects_list = set() + subjects_dict = {} + # Collect the `study_id => lab_id` mappings for subject in raw_data_root.iter('subject'): - subjects_list.add(subject.find('STUDY_ID').text) + study_id = subject.findtext('STUDY_ID') + subjects_dict[study_id] = subject.attrib['lab_id'] - if not subjects_list: - raise Exception('There is no subjects in the raw data') + if not subjects_dict: + raise Exception("There are no subjects in the raw data. " \ + "This can be caused by an incorrect input file or "\ + "by lack of enrollment data in the REDCap database." ) - for subject_id in subjects_list: + for subject_id in subjects_dict.iterkeys(): person = etree.Element("person") + # Copy `lab_id` attribute from `subject` to `person` element + person.set('lab_id', subjects_dict.get(subject_id)) study_id = etree.SubElement(person, "study_id") study_id.text = subject_id - person.insert( - person.index( - person.find('study_id')) + 1, - etree.XML( - etree.tostring( - all_form_events_root, - method='html', - pretty_print=True))) - root.append(person) + person_index = person.index(person.find('study_id')) + 1 - tree = etree.ElementTree(root) - return tree + # insert the pretty-fied form events + pretty_form_events = etree.XML( + etree.tostring(all_form_events_root, method='html', pretty_print=True)) + person.insert(person_index, pretty_form_events) + pfe_element.append(person) + return etree.ElementTree(pfe_element) def setStat( event, @@ -2009,6 +2034,14 @@ def store(self, pfe_tree): method="xml", pretty_print=True) +def gen_ele(ele_name, ele_text): + """ Create an xml element with given name and content """ + return etree.XML("<{}>{}".format(ele_name, ele_text, ele_name)) + +def gen_subele(parent, subele_name, subele_text): + subele = etree.SubElement(parent, subele_name) + subele.text = subele_text + return subele if __name__ == "__main__": main() diff --git a/bin/redi_lib.py b/bin/redi_lib.py index 816d19a..a89bb51 100644 --- a/bin/redi_lib.py +++ b/bin/redi_lib.py @@ -155,6 +155,7 @@ def generate_output(person_tree, redcap_client, rate_limit, data_repository, ski # init dictionary for a new person in (study_id) if study_id_key not in subject_details: subject_details[study_id_key] = {} + subject_details[study_id_key]['lab_id'] = person.get('lab_id') if not form_key in subject_details[study_id_key]: subject_details[study_id_key][form_key] = 0 diff --git a/bin/utils/SimpleConfigParser.py b/bin/utils/SimpleConfigParser.py index 2258b50..b516c3d 100755 --- a/bin/utils/SimpleConfigParser.py +++ b/bin/utils/SimpleConfigParser.py @@ -120,6 +120,7 @@ "emr_sftp_server_password": None, "emr_sftp_server_private_key": None, "emr_sftp_server_private_key_pass": None, + "is_sort_by_lab_id": True, } class ConfigurationError(Exception): diff --git a/bin/utils/redi_email.py b/bin/utils/redi_email.py index b825e58..2d02f5f 100644 --- a/bin/utils/redi_email.py +++ b/bin/utils/redi_email.py @@ -2,6 +2,9 @@ from smtplib import SMTPException from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart +from email.MIMEBase import MIMEBase +from email import Encoders +from datetime import date import logging logger = logging.getLogger(__name__) @@ -58,14 +61,39 @@ def send_email_input_data_unchanged(email_settings, subject='', msg=''): Please investigate.""".format(email_settings['batch_warning_days']) return send_email(host, str(port), sender, to_addr_list, None, subject, msg) + +def add_attachment(msg, body): + """ + Add the html report as attachment + + Parameters + ---------- + msg : MIMEMultipart + The object to which we attach the body content + body : string + The html content to be attached + """ + part = MIMEBase('application', "octet-stream") + part.set_payload(body) + Encoders.encode_base64(part) + file_name = "redi_report_{}.html".format(date.today()) + part.add_header('Content-Disposition', \ + 'attachment; filename="{}"'.format(file_name)) + msg.attach(part) + + def send_email_data_import_completed(email_settings, body=''): """ Email the html report after redi completed the data transfer - :email_settings the dictionary produced by redi.get_email_settings() - :body: the html string produced by transforming the xsl - generated by redi.create_summary_report() + Returns a dictionary, with one entry for each recipient that was refused - :return a dictionary, with one entry for each recipient that was refused + Parameters + ---------- + email_settings : dict + Email params produced by redi.get_email_settings() + body : string + The html content produced by transforming the xsl + generated by redi.create_summary_report() """ sender = email_settings['batch_report_sender_email'] to_addr_list = email_settings['batch_report_receiving_list'] @@ -77,17 +105,13 @@ def send_email_data_import_completed(email_settings, body=''): msg['To'] = ",".join(to_addr_list) msg['Subject'] = subject msg.attach(MIMEText(body, 'html')) + add_attachment(msg, body) refused_list = {} - try: - smtpObj = smtplib.SMTP(host, port) - refused_list = smtpObj.sendmail(sender, to_addr_list, msg.as_string()) - logger.info("Successfully sent email to: " + str(to_addr_list)) - except Exception: - logger.error("Unable to send email with subject [{}] to {}" \ - .format(subject, str(to_addr_list))) - raise - smtpObj.quit() + smtp_obj = smtplib.SMTP(host, port) + refused_list = smtp_obj.sendmail(sender, to_addr_list, msg.as_string()) + logger.info("Successfully sent email to: " + str(to_addr_list)) + smtp_obj.quit() return refused_list def send_email( @@ -99,10 +123,12 @@ def send_email( subject, msg_body): """ - The email deliverer - :to_addr_list: must be a list not a string + The email deliverer. Return True if the email was sent - :return True if the email was sent + Parameters + ---------- + to_addr_list : list + The recipients of the email """ #print ('host %s, port: %s' % (host, port)) success = False @@ -122,12 +148,12 @@ def send_email( subject + '] was sent to:' + str(to_addr_list)) - except SMTPException: - logger.error("Unable to send email with subject [{}] to {}" \ - .format(subject, str(to_addr_list))) + except SMTPException as smtpe: + logger.error("Unable to send email with subject [{0}] to {1} due: {2}" \ + .format(subject, str(to_addr_lista), str(smtpe))) logger.info("Please check if the recipient email is valid") except Exception as e: - logger.error("Unable to send email with subject [{}] to {}\n{}" \ - .format(subject, str(to_addr_list), msg_body)) + logger.error("Unable to send email with subject [{0}] to {1}\n due: {2}" \ + .format(subject, str(to_addr_list), str(e))) logger.info("Please check if the smtp server is configured properly") return success diff --git a/bin/utils/report.xsd b/bin/utils/report.xsd deleted file mode 100644 index 29d5dfb..0000000 --- a/bin/utils/report.xsd +++ /dev/null @@ -1,111 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/bin/utils/report.xsl b/bin/utils/report.xsl index cf60ad2..154d33b 100644 --- a/bin/utils/report.xsl +++ b/bin/utils/report.xsl @@ -1,20 +1,47 @@ - - - - - - - - - Summary Report - - + + + + + + + + + + + + + + + + Summary Report + +

Data Import Report

@@ -44,16 +71,16 @@

Summary

- +
+ - + @@ -63,9 +90,9 @@ + - +
Total Subjects -
-

@@ -114,27 +141,40 @@

Subject Details

- +
+ - + + + - + + + + + @@ -142,7 +182,7 @@
+ +
SubjectSubject IDSubject Number -
- + + + -

Errors

- +
@@ -154,4 +194,4 @@ - \ No newline at end of file + diff --git a/config-example/settings.ini b/config-example/settings.ini index a8c5e1d..a0cb0ec 100644 --- a/config-example/settings.ini +++ b/config-example/settings.ini @@ -70,6 +70,10 @@ report_file_path = report.xml # Use this parameter to specify the file name for the report email stored in html format. # Optional parameter report_file_path2 = report.html + +# Optional parameter used to choose sorting of the data +# in the summary email by `lab_id` or by `redcap_id` +is_sort_by_lab_id = Y # --- /section_redi_emails # ------------------------------------------------------------------------------- diff --git a/test/TestCreateEmptyEventTreeForStudy.py b/test/TestCreateEmptyEventTreeForStudy.py index 48b7f33..c73f2e8 100644 --- a/test/TestCreateEmptyEventTreeForStudy.py +++ b/test/TestCreateEmptyEventTreeForStudy.py @@ -12,39 +12,36 @@ class TestCreateEmptyEventTreeForStudy(unittest.TestCase): def setUp(self): - self.all_form_events = """ - cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - - -
- chemistry + self.all_form_events = """ + + + cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + + + +
+ chemistry + + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres + + +
+
+ inr - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - -
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
+ 1_arm_1 + inr_lbdtcinr_completeinr_nximport + + +
""" - self.data_all_form_events= etree.ElementTree(etree.fromstring(self.all_form_events)) - return() - def test_create_empty_event_tree_for_study_for_zero_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.zero_subjects = """ @@ -57,7 +54,7 @@ def test_create_empty_event_tree_for_study_for_one_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.one_subject = """ - + TestSubject 123456 123 @@ -71,43 +68,47 @@ def test_create_empty_event_tree_for_study_for_one_subjects(self): """ self.data_one_subject= etree.ElementTree(etree.fromstring(self.one_subject)) - self.output_one_subject = """123
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - -
+ self.output_one_subject = """ + + + 123 + +
+ cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres + +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres +
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
+
+ inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport +
+
+
+
""" self.expect_one_subject = etree.tostring(etree.fromstring(self.output_one_subject)) - - self.result = etree.tostring(redi.create_empty_event_tree_for_study(self.data_one_subject,self.data_all_form_events)) - self.assertEqual(self.expect_one_subject, self.result) - + self.result = etree.tostring( + redi.create_empty_event_tree_for_study(self.data_one_subject,self.data_all_form_events)) + clean_expected = ''.join(self.expect_one_subject.split()) + clean_result = ''.join(self.result.split()) + self.assertEqual(clean_expected, clean_result) + def test_create_empty_event_tree_for_study_for_two_subjects(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.two_subjects = """ - + TestSubject_1 123456 123 @@ -117,7 +118,7 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): 123 cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - + TestSubject_2 123456 123 @@ -131,62 +132,68 @@ def test_create_empty_event_tree_for_study_for_two_subjects(self): """ self.data_two_subjects= etree.ElementTree(etree.fromstring(self.two_subjects)) - self.output_two_subjects = """1234
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - + self.output_two_subjects = """ + + + 1234 + + + cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres +
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
123
- cbc - - 1_arm_1 - lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres - - + + inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+
+
+ + 123 + +
+ cbc + + 1_arm_1 + lymce_lborreslymce_lborresuhemo_lborresucbc_lbdtccbc_nximportlymce_lbstatcbc_completehemo_lbstathemo_lborres +
chemistry - 1_arm_1 - k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres - - + 1_arm_1 + k_lborreschem_lbdtcsodium_lborresuk_lbstatsodium_lbstatchem_nximportchemistry_completek_lborresusodium_lborres +
- -
- inr - - 1_arm_1 - inr_lbdtcinr_completeinr_nximport - -
- -
+
+ inr + + 1_arm_1 + inr_lbdtcinr_completeinr_nximport + +
+
+
+
""" self.expect_two_subjects = etree.tostring(etree.fromstring(self.output_two_subjects)) - - self.result = etree.tostring(redi.create_empty_event_tree_for_study(self.data_two_subjects,self.data_all_form_events)) - self.assertEqual(self.expect_two_subjects, self.result) + self.result = etree.tostring( + redi.create_empty_event_tree_for_study(self.data_two_subjects,self.data_all_form_events)) + clean_expected = ''.join(self.expect_two_subjects.split()) + clean_result = ''.join(self.result.split()) + self.assertEqual(clean_expected, clean_result) def tearDown(self): return() diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 239ce7b..065786b 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -9,10 +9,6 @@ import time import redi -file_dir = os.path.dirname(os.path.realpath(__file__)) -goal_dir = os.path.join(file_dir, "../") -proj_root = os.path.abspath(goal_dir)+'/' - DEFAULT_DATA_DIRECTORY = os.getcwd() class TestCreateSummaryReport(unittest.TestCase): @@ -21,8 +17,10 @@ def setUp(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.test_report_params = { 'project': 'hcvtarget-uf', - 'report_file_path': proj_root + 'config/report.xml', - 'redcap_uri': 'https://hostname.org'} + 'report_file_path': os.path.join(DEFAULT_DATA_DIRECTORY, 'unittest_report.xml'), + 'redcap_uri': 'https://hostname.org', + 'is_sort_by_lab_id': True, + } self.test_report_data = { 'total_subjects': 5, @@ -31,10 +29,10 @@ def setUp(self): 'Total_cbc_Forms': 53 }, 'subject_details': { - '60': {'cbc_Forms': 1, 'chemistry_Forms': 1}, - '61': {'cbc_Forms': 2, 'chemistry_Forms': 1}, - '63': {'cbc_Forms': 11, 'chemistry_Forms': 4}, - '59': {'cbc_Forms': 39, 'chemistry_Forms': 16} + '60': {'cbc_Forms': 1, 'chemistry_Forms': 1, 'lab_id': '999-0060'}, + '61': {'cbc_Forms': 2, 'chemistry_Forms': 1, 'lab_id': '999-0061'}, + '63': {'cbc_Forms': 11, 'chemistry_Forms': 4, 'lab_id': '999-0063'}, + '59': {'cbc_Forms': 39, 'chemistry_Forms': 16, 'lab_id': '999-0059'} }, 'errors' : [], } @@ -92,7 +90,8 @@ def setUp(self): - 59 + + 59
cbc_Forms @@ -103,9 +102,10 @@ def setUp(self): 16
-
- - 60 + 999-0059 + + + 60
cbc_Forms @@ -115,8 +115,10 @@ def setUp(self): 1
-
- 61 + 999-0060 + + + 61
cbc_Forms @@ -127,9 +129,10 @@ def setUp(self): 1
-
- - 63 + 999-0061 + + + 63
cbc_Forms @@ -140,7 +143,8 @@ def setUp(self): 4
-
+ 999-0063 +
@@ -148,6 +152,7 @@ def setUp(self): 3 20.0 + lab_id ''' self.schema_str = StringIO('''\ @@ -222,10 +227,10 @@ def setUp(self): - + - + @@ -240,6 +245,7 @@ def setUp(self): + @@ -257,6 +263,7 @@ def setUp(self): + @@ -264,13 +271,11 @@ def setUp(self): return def test_create_summary_report(self): - + """ + Validates the summary xml structure using xsd + Validate the summary xml content + """ sys.path.append('config') - self.newpath = proj_root+'config' - self.configFolderCreatedNow = False - if not os.path.exists(self.newpath): - self.configFolderCreatedNow = True - os.makedirs(self.newpath) result = redi.create_summary_report(\ self.test_report_params, \ @@ -283,20 +288,16 @@ def test_create_summary_report(self): xml_schema = etree.XMLSchema(xmlschema_doc) # validate the xml against the xsd schema self.assertEqual(xml_schema.validate(result), True) + # validate the actual data in xml but strip the white space first parser = etree.XMLParser(remove_blank_text=True) clean_tree = etree.XML(self.expected_xml, parser=parser) self.expected_xml = etree.tostring(clean_tree) - self.assertEqual(self.expected_xml, result_string) def tearDown(self): # delete the created xml file - with open(proj_root + 'config/report.xml'): - os.remove(proj_root + 'config/report.xml') - - if self.configFolderCreatedNow: - os.rmdir(self.newpath) + os.remove(self.test_report_params['report_file_path']) return if __name__ == '__main__': diff --git a/test/TestGenerateOutput.py b/test/TestGenerateOutput.py index c5e9bb0..782bce9 100755 --- a/test/TestGenerateOutput.py +++ b/test/TestGenerateOutput.py @@ -37,7 +37,7 @@ def test_person_form_event(self): + "#test_person_form_event() using xml: " ) string_1_xml = """ - + 100
@@ -63,7 +63,7 @@ def test_person_form_event(self): - + 99 @@ -112,7 +112,7 @@ def test_person_form_event(self): - + 98
@@ -144,9 +144,9 @@ def test_person_form_event(self): form_details = {'Total_cbc_Forms': 2, 'Total_inr_Forms': 3} subject_details = { - '98' : {'Total_cbc_Forms' : 0, 'Total_inr_Forms' : 1 }, - '99' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1 }, - '100' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1 } + '98' : {'Total_cbc_Forms' : 0, 'Total_inr_Forms' : 1, 'lab_id': "999-0098" }, + '99' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1, "lab_id": "999-0099" }, + '100' : {'Total_cbc_Forms' : 1, 'Total_inr_Forms' : 1, "lab_id": "999-0100" } } report_data = { diff --git a/test/TestResearchIdToRedcapId.py b/test/TestResearchIdToRedcapId.py index be0cd93..318b88f 100644 --- a/test/TestResearchIdToRedcapId.py +++ b/test/TestResearchIdToRedcapId.py @@ -19,7 +19,7 @@ class TestResearchIdToRedcapId(unittest.TestCase): def setUp(self): self.sortedData = """ - + HEMOGLOBIN 1534435 @@ -28,8 +28,9 @@ def setUp(self): 16.0 g/dL - 999-0059 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + 999-0001 + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + WBC 999 @@ -38,39 +39,19 @@ def setUp(self): - 999-0059 - cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat - - PLATELET COUNT - 1009 - 92 - - - - - 999-0059 - cbccbc_lbdtccbc_completecbc_nximportplat_lborresplat_lborresuplat_lbstat - - HEMOGLOBIN - 1534435 - 9.5 - 12.0 - 16.0 - g/dL - - 999-0059 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - """ + 999-0002 + cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat + +""" self.data = etree.ElementTree(etree.fromstring(self.sortedData)) - self.serverResponse = """ - + self.serverResponse = """ + -""" - +""" self.output = """ - + HEMOGLOBIN 1534435 10.5 @@ -79,8 +60,9 @@ def setUp(self): g/dL 1 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - + cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat + + WBC 999 5.4 @@ -88,38 +70,20 @@ def setUp(self): - 1 - cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat - - PLATELET COUNT - 1009 - 92 - - - - - 1 - cbccbc_lbdtccbc_completecbc_nximportplat_lborresplat_lborresuplat_lbstat - - HEMOGLOBIN - 1534435 - 9.5 - 12.0 - 16.0 - g/dL - - 1 - cbccbc_lbdtccbc_completecbc_nximporthemo_lborreshemo_lborresuhemo_lbstat - """ + 2 + cbccbc_lbdtccbc_completecbc_nximportwbc_lborreswbc_lborresuwbc_lbstat + +""" self.expect = etree.tostring(etree.fromstring(self.output)) self.configuration_directory = tempfile.mkdtemp('/') self.research_id_to_redcap_id = "research_id_to_redcap_id_map.xml" try: f = open(os.path.join(self.configuration_directory, self.research_id_to_redcap_id), "w+") - f.write(""" - dm_subjid - dm_usubjid + f.write(""" + + dm_subjid + dm_usubjid """) f.close() except: @@ -131,7 +95,16 @@ def dummy_redcapClient_initializer(self, redcap_uri, token, verify_ssl): def dummy_get_data_from_redcap(self,records_to_fecth=[],events_to_fetch=[], fields_to_fetch=[], forms_to_fetch=[], return_format='xml'): dummy_output = """ - + + + + + + + + + + """ return dummy_output diff --git a/vagrant/aliases b/vagrant/aliases index d7a53d6..768bc1c 100644 --- a/vagrant/aliases +++ b/vagrant/aliases @@ -1,4 +1,5 @@ alias db='mysql --prompt="(\u@\h) [\d]> " --pager="less -niSFX" -uroot -ppassword redcap' alias check_redcap="curl -s http://localhost/redcap/ | grep -i 'Welcome\|Critical Error' " alias restart_httpd='sudo /etc/init.d/apache2 reload ' +alias restart_mysql='sudo service mysql restart' alias lsa='ls -al --color=auto' From cc854bb3952116ecc00c3301b87d27c2c05d2bc4 Mon Sep 17 00:00:00 2001 From: Nicholas Rejack Date: Fri, 10 Oct 2014 10:57:21 -0400 Subject: [PATCH 19/51] Updated README.md to point DOI link at a resolvable URL. Fixes #96 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 03bfc0e..750c019 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ RED-I Project ============= -![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.10014.png ".") +[![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.10014.png ".")](http://dx.doi.org/10.5281/zenodo.10014) Introduction ------------ From 57d5a9f0367aa3db6a6f492c30e81b8c6e8aafc0 Mon Sep 17 00:00:00 2001 From: Nicholas Rejack Date: Fri, 10 Oct 2014 11:58:17 -0400 Subject: [PATCH 20/51] Updated vagrant/README.md to fix typo and clarify directories used in configuring VM --- vagrant/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vagrant/README.md b/vagrant/README.md index 54fec6b..f30f531 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -41,13 +41,14 @@ virtual machine path "**/var/www/redcap**". Now execute the following commands to complete the configuration:
+cd ./vagrant	# must be in the redi/vagrant/ directory
 make copy_config_example
 make copy_redcap_code
 make copy_project_data
 make show_config
 
-Please verify that the output from "show_config" matheches your expectations. +Please verify that the output from "show_config" matches your expectations. ### 3. Start the VM @@ -55,7 +56,7 @@ To use the vagrant VM you will need to install Vagrant and Virtual Box. With these packages installed, follow this procedure to use a VM template: - cd ./vagrant + # must be in the redi/vagrant/ directory vagrant up Vagrant will instantiate and provision the new VM. The REDCap web application should be accessible in the browser at From 8655fb83734fb8fc6c4168c5dfa262abb3aaa2c0 Mon Sep 17 00:00:00 2001 From: Nicholas Rejack Date: Fri, 10 Oct 2014 14:12:19 -0400 Subject: [PATCH 21/51] Removed vagrant/README.md and updated docs/test_sample_project_using_vagrant.rst to fix typo and improve clarity --- docs/test_sample_project_using_vagrant.rst | 12 ++- vagrant/README.md | 106 --------------------- 2 files changed, 8 insertions(+), 110 deletions(-) delete mode 100644 vagrant/README.md diff --git a/docs/test_sample_project_using_vagrant.rst b/docs/test_sample_project_using_vagrant.rst index b63dad6..297d1e0 100644 --- a/docs/test_sample_project_using_vagrant.rst +++ b/docs/test_sample_project_using_vagrant.rst @@ -56,13 +56,14 @@ Now execute the following commands to complete the configuration: .. raw:: html
+   cd ./vagrant    # must be in the redi/vagrant/ directory
    make copy_config_example
    make copy_redcap_code
    make copy_project_data
    make show_config
    
-Please verify that the output from "show\_config" matheches your +Please verify that the output from "show\_config" matches your expectations. 3. Start the VM @@ -73,10 +74,13 @@ To use the vagrant VM you will need to install Vagrant and Virtual Box. With these packages installed, follow this procedure to use a VM template: -:: +.. raw:: html - cd ./vagrant - vagrant up +
+   # must be in the redi/vagrant/ directory
+   cd ./vagrant
+   vagrant up
+   
Vagrant will instantiate and provision the new VM. The REDCap web application should be accessible in the browser at diff --git a/vagrant/README.md b/vagrant/README.md deleted file mode 100644 index f30f531..0000000 --- a/vagrant/README.md +++ /dev/null @@ -1,106 +0,0 @@ -# Testing RED-I with a sample REDCap Project - -## Purpose - -The "vagrant" folder was created with the goal of making testing [RED-I software](https://github.com/ctsit/redi) as easy as possible. -It contains the [Vagrantfile](../vagrant/Vagrantfile) which allows to start a virtual machine capable of running the -[REDCap software](http://http://www.project-redcap.org) -- which means that during virtual machine creation the Apache and MySQL -software is installed without any user intervention. - -There are a few important things to note before proceeding with running RED-I to import data into a sample REDCap project: - -- You have to install the **vagrant** and **virtual box** software -- You have to obtain the closed-source REDCap software from http://project-redcap.org/ -- You have to obtain a **Makefile.ini** file in order to be able to execute tasks from the **Makefile** - -## Steps - -### 1. Install vagrant and virtual box - -On a linux machine run: - -* sudo apt-get install vagrant -* sudo apt-get install virtualbox - - -On a mac machine: - -* Download and install vagrant from https://www.vagrantup.com/downloads.html -* Download and install the latest virtual box from http://download.virtualbox.org/virtualbox/ - -For more details about Vagrant software you can go to [why-vagrant](https://docs.vagrantup.com/v2/why-vagrant/) page. - - -### 2. Configure the VM - -As mentioned above you have to obtain a copy of the REDCap software from http://project-redcap.org/ -and save it as "**redcap.zip**" file in the "**config-example/vagrant-data**" folder. -This ensures that in the later steps the [bootstrap.sh](../vagrant/bootstrap.sh) script can extract the files to the -virtual machine path "**/var/www/redcap**". - -Now execute the following commands to complete the configuration: - -
-cd ./vagrant	# must be in the redi/vagrant/ directory
-make copy_config_example
-make copy_redcap_code
-make copy_project_data
-make show_config
-
- -Please verify that the output from "show_config" matches your expectations. - -### 3. Start the VM - -To use the vagrant VM you will need to install Vagrant and Virtual Box. - -With these packages installed, follow this procedure to use a VM template: - - # must be in the redi/vagrant/ directory - vagrant up - -Vagrant will instantiate and provision the new VM. The REDCap web application should be accessible in the browser at - - http://localhost:8998/redcap/ - -If port 8998 is already in use vagrant will choose a different port automatically. -Read the log of "vagrant up" and note the port to be used. - -### 4. Verify the VM is running - -Verify that the virtual machine is working properly by accessing it using: - -
-vagrant ssh
-
- -### 5. Import Enrollment Data using RED-I - -Import the [sample subject list](../config-example/vagrant-data/enrollment_test_data.csv) into REDCap by executing: - -
-make rc_enrollment
-
- -Note: This step is necessary because in order to associate data with subjects the list of subjects needs to exist in the REDCap database. - - -### 6. Import Electronic Health Records using RED-I - -Import the [sample electronic health records](../config-example/vagrant-data/redi_sample_project_v5.7.4.sql) into REDCap by executing: - -
-make rc_post
-
- -Verify that the output of this command ends with: -
-You can review the summary report by opening: report.html in your browser
-
- -If this step succeded you have verified that RED-I can be used to save time by automating EHR data imports into REDCap. - - -Congratulations! You can now [add your own REDCap project](../doc/add_new_redcap_project.md) -and start using RED-I to move data. - From db3105cf8e8eacc99d8a083ad3fb75ccdf8613e1 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Tue, 14 Oct 2014 10:00:55 -0400 Subject: [PATCH 22/51] Remove unused file `config-example/report.xsl` --- config-example/report.xsl | 157 -------------------------------------- 1 file changed, 157 deletions(-) delete mode 100644 config-example/report.xsl diff --git a/config-example/report.xsl b/config-example/report.xsl deleted file mode 100644 index 8d816ff..0000000 --- a/config-example/report.xsl +++ /dev/null @@ -1,157 +0,0 @@ - - - - - - - - - - Summary Report - - -

Data Import Report

- - - - - - - - - - - - - -
- Project - - -
- Date - - -
- RedCapServerAddress - - -
-
-

Summary

- - - - - - - - - - - - - - - - - -
Total Subjects - -
- - - -
-
- - - - - - - - -
-

Subject Details

- - - - - - - - - - - - - - - - - - - -
Subject - -
- - - -
-
-

Errors

- - - - - - -
- -
- - -
-
\ No newline at end of file From 930fddb583935126fcaabe331b6dbf5b5a8e6598 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Wed, 15 Oct 2014 11:28:09 -0400 Subject: [PATCH 23/51] Save links for downloading REDCap --- vagrant/downloading_redcap_code.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 vagrant/downloading_redcap_code.rst diff --git a/vagrant/downloading_redcap_code.rst b/vagrant/downloading_redcap_code.rst new file mode 100644 index 0000000..2ba2414 --- /dev/null +++ b/vagrant/downloading_redcap_code.rst @@ -0,0 +1,17 @@ +Downloading REDCap Source Code +============================== + +REDCap code is available only through Vanderbilt University. + +Possession and use of REDCap code and workflow methodology is strictly +limited to institutions and organizations who have finalized an End-User +License Agreement with Vanderbilt University. + +https://redcap.vanderbilt.edu/consortium/ + +Long Term Support URLs +---------------------- + +* https://iwg.devguard.com/trac/redcap/browser/zips_redcap/6.0.5/redcap6.0.5.zip +* https://iwg.devguard.com/trac/redcap/browser/zips_redcap/6.0.5/redcap6.0.5_upgrade.zip + From b805a0d9a0afeb8f33e75d87f51b81b620aeb425 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Wed, 15 Oct 2014 11:29:08 -0400 Subject: [PATCH 24/51] Add more useful aliases for vagrant box --- vagrant/aliases | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) diff --git a/vagrant/aliases b/vagrant/aliases index 768bc1c..d6777fa 100644 --- a/vagrant/aliases +++ b/vagrant/aliases @@ -2,4 +2,42 @@ alias db='mysql --prompt="(\u@\h) [\d]> " --pager="less -niSFX" -uroot -ppasswor alias check_redcap="curl -s http://localhost/redcap/ | grep -i 'Welcome\|Critical Error' " alias restart_httpd='sudo /etc/init.d/apache2 reload ' alias restart_mysql='sudo service mysql restart' -alias lsa='ls -al --color=auto' +alias ls='ls --color=auto' +alias lsa='ls -al' +alias lss='ls -ltr' +alias dua='du -hcs' +alias cdd='cd ..' + +alias gst='git status' +alias glog='git log' +alias gdiff='git diff' +alias gdif='git diff --cached' +alias gb='git branch' +alias gp='git remote -v' +alias gf='git show --name-status' + +alias gan='git blame' +alias gin='git fetch && git log ..origin/master' +alias gout='git fetch && git log origin/master..' + +alias grepp="nice ack-grep --php --python -i" + +function venv() { + CMD="X$1" + + if [ "$CMD" = "Xup" ]; then + . venv/bin/activate + elif [ "$CMD" = "Xdown" ]; then + deactivate + elif [ "$CMD" = "Xrestart" ]; then + rm -rf venv + virtualenv venv + . venv/bin/activate + which redi + elif [ "$CMD" = "Xdestroy" ]; then + deactivate + rm -rf venv + else + echo "Supported commands: venv [up | down | restart | destroy]" + fi +} From 4494cad05850ae477fc957562f3c195bd88f4a74 Mon Sep 17 00:00:00 2001 From: Buck at UF Date: Wed, 15 Oct 2014 15:56:54 -0400 Subject: [PATCH 25/51] Update test_sample_project_using_vagrant.rst --- docs/test_sample_project_using_vagrant.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/test_sample_project_using_vagrant.rst b/docs/test_sample_project_using_vagrant.rst index 297d1e0..3aa4529 100644 --- a/docs/test_sample_project_using_vagrant.rst +++ b/docs/test_sample_project_using_vagrant.rst @@ -6,7 +6,7 @@ Purpose The "vagrant" folder was created with the goal of making testing `RED-I software `__ as easy as possible. It -contains the `Vagrantfile <../vagrant/Vagrantfile>`__ which allows to +contains the `Vagrantfile <../vagrant/Vagrantfile>`__ which allows you to start a virtual machine capable of running the `REDCap software `__ -- which means that during virtual machine creation the Apache and MySQL software is From 9dbdef4e14cfbfd3df868614559ceca31a0d161f Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Wed, 8 Oct 2014 15:00:10 -0400 Subject: [PATCH 26/51] Extract report creation and sending from _run() I moved the report creation and sending functions into their own `report` module. --- bin/redi.py | 192 +++------------------------- bin/report.py | 220 ++++++++++++++++++++++++++++++++ test/TestCreateSummaryReport.py | 11 +- test/TestResume.py | 8 +- 4 files changed, 254 insertions(+), 177 deletions(-) create mode 100644 bin/report.py diff --git a/bin/redi.py b/bin/redi.py index eddb0af..8351ab5 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -54,6 +54,7 @@ from lxml import etree from docopt import docopt +import report from utils import redi_email from utils.redcapClient import RedcapClient import utils.SimpleConfigParser as SimpleConfigParser @@ -173,9 +174,14 @@ def main(): redcap_client = connect_to_redcap(get_email_settings(settings), get_redcap_settings(settings), dry_run) + if settings.send_email: + report_courier = report.ReportEmailSender(get_email_settings(settings), logger) + else: + report_courier = report.ReportFileWriter(settings.report_file_path2, logger) + _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, output_files, db_path, redcap_client, - args['--resume'], args['--skip-blanks']) + report_courier, args['--resume'], args['--skip-blanks']) def _makedirs(data_folder): @@ -246,7 +252,7 @@ def connect_to_redcap(email_settings, redcap_settings, dry_run=False): def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, data_folder, database_path, redcap_client, - resume=False, skip_blanks=False): + report_courier, resume=False, skip_blanks=False): global translational_table_tree assert _person_form_events_service is not None @@ -292,9 +298,6 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, 'is_sort_by_lab_id': settings.is_sort_by_lab_id, } - report_xsl = proj_root + "bin/utils/report.xsl" - send_email = settings.send_email - if not resume: _delete_last_runs_data(data_folder) @@ -336,20 +339,19 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, report_data['errors'].extend(rule_errors) # create summary report - xml_report_tree = create_summary_report(report_parameters, - report_data, alert_summary, - collection_date_summary_dict) - # print etree.tostring(xml_report_tree) - report_xsl = proj_root + "bin/utils/report.xsl" - xslt = etree.parse(report_xsl) - transform = etree.XSLT(xslt) - html_report = transform(xml_report_tree) - html_str = etree.tostring(html_report, method='html', pretty_print=True) - - if settings.send_email: - deliver_report_as_email(email_settings, html_str) - else: - deliver_report_as_file(settings.report_file_path2, html_str) + # TODO: remove the need for the ReportWriter object (backwards-compat) + class ReportWriter(object): + def write(self, element_tree, file_name): + write_element_tree_to_file(element_tree, file_name) + + creator = report.ReportCreator(report_parameters, + report_data, alert_summary, + collection_date_summary_dict, + ReportWriter()) + xml_report_tree = creator.create_report() + html_str = creator.to_html(xml_report_tree) + + report_courier.deliver(html_str) if batch: # Update the batch row @@ -365,49 +367,6 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, redi_lib.delete_temporary_folder(data_folder) -def deliver_report_as_file(html_report_path, html): - """ - Deliver the summary report by writing it to a file - or logging it to the console if writing the file fails - - :html_report_path the path where the report will be stored - :html the actual report content - """ - problem_found = False - try: - report_file = open(html_report_path, 'w') - except (IOError, OSError) as e: - logger.exception('Could not open file: %s' % html_report_path) - problem_found = True - else: - try: - report_file.write(html) - logger.info("==> You can review the summary report by opening: {}"\ - " in your browser".format(html_report_path)) - except IOError: - logger.exception('Could not write file: %s' % html_report_path) - problem_found = True - finally: - report_file.close() - if problem_found: - logger.info("== Summary report ==" + html) - - -def deliver_report_as_email(email_settings, html): - """ - Deliver summary report as an email - - :email_settings dictinary with email parameters - :html the actual report content - """ - try: - redi_email.send_email_data_import_completed(email_settings, html) - logger.info("Summary report was emailed: parameter 'send_email = Y'") - except Exception as e: - logger.error("Unable to deliver the summary report due error: %s" % e) - deliver_report_as_file("report.html", html) - - def _create_person_form_event_tree_with_data( config_file, configuration_directory, redcap_client, form_events_file, raw_xml_file, rules, settings, data_folder, translation_table_file): @@ -1220,115 +1179,6 @@ def configure_logging(data_folder, verbose=False): return logger -def create_summary_report(report_parameters, report_data, alert_summary, \ - collection_date_summary_dict): - """ - Generates the xml to be transformed by `bin/utils/report.xsl` - into an html report with details about data import completed. - """ - root = etree.Element("report") - root.append(etree.Element("header")) - root.append(etree.Element("summary")) - root.append(etree.Element("alerts")) - root.append(etree.Element("subjectsDetails")) - root.append(etree.Element("errors")) - root.append(etree.Element("summaryOfSpecimenTakenTimes")) - updateReportHeader(root, report_parameters) - updateReportSummary(root, report_data) - updateSubjectDetails(root, report_data['subject_details']) - updateReportAlerts(root, alert_summary) - updateReportErrors(root, report_data['errors']) - updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict) - - # TODO: remove dependency on the order of the xml elements in the report - sort_by_value = 'lab_id' if report_parameters['is_sort_by_lab_id'] else 'redcap_id' - root.append(gen_ele("sort_details_by", sort_by_value)) - - tree = etree.ElementTree(root) - write_element_tree_to_file(tree,report_parameters.get('report_file_path')) - return tree - -def updateReportHeader(root, report_parameters): - """ Update the passed `root` element tree with date, project name and url""" - header = root[0] - project = etree.SubElement(header, "project") - project.text = report_parameters.get('project') - date = etree.SubElement(header, "date") - date.text = time.strftime("%m/%d/%Y") - redcapServerAddress = etree.SubElement(header, "redcapServerAddress") - redcapServerAddress.text = report_parameters.get('redcap_uri') - - -def updateReportSummary(root, report_data): - summary = root[1] - subjectCount = etree.SubElement(summary, "subjectCount") - subjectCount.text = str(report_data.get('total_subjects')) - forms = etree.SubElement(summary, "forms") - form_data = report_data['form_details'] - for k in sorted(form_data.keys()): - form = etree.SubElement(forms, "form") - name_element = etree.SubElement(form, "form_name") - name_element.text = k - count_element = etree.SubElement(form, "form_count") - count_element.text = str(form_data.get(k)) - - -def updateReportAlerts(root, alert_summary): - alerts = root[2] - too_many_forms = etree.SubElement(alerts, 'tooManyForms') - too_many_values = etree.SubElement(alerts, 'tooManyValues') - for event in alert_summary['max_event_alert']: - event_alert = etree.SubElement(too_many_forms, 'eventAlert') - msg = etree.SubElement(event_alert, 'message') - msg.text = event - for value in alert_summary['multiple_values_alert']: - values_alert = etree.SubElement(too_many_values, 'valuesAlert') - msg = etree.SubElement(values_alert, 'message') - msg.text = value - -def updateSubjectDetails(root, subject_details): - """ - Helper method for #create_summary_report() - Adds subject information to the xml tree which is later formated - by `bin/utils/report.xsl` into the html `table#subject_details"` - """ - subjectsDetails = root[3] - for key in sorted(subject_details.keys()): - subject = etree.SubElement(subjectsDetails, "subject") - details = subject_details.get(key) - subject.append(gen_ele("redcap_id", key)) - forms = etree.SubElement(subject, "forms") - - for k in sorted(details.keys()): - if(k.endswith("_Forms")): - form = etree.SubElement(forms, "form") - name_element = etree.SubElement(form, "form_name") - name_element.text = k - count_element = etree.SubElement(form, "form_count") - count_element.text = str(details.get(k)) - else: - element = etree.SubElement(subject, k) - element.text = str(details.get(k)) - - -def updateReportErrors(root, errors): - errorsRoot = root[4] - for error in errors: - errorElement = etree.SubElement(errorsRoot, "error") - errorElement.text = str(error) - - -def updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict): - timeSummaryRoot = root[5] - totalElement = etree.SubElement(timeSummaryRoot, "total") - totalElement.text = str(collection_date_summary_dict['total']) - blankElement = etree.SubElement(timeSummaryRoot, "blank") - blankElement.text = str(collection_date_summary_dict['blank']) - percentElement = etree.SubElement(timeSummaryRoot, "percent") - percentElement.text = str((float(collection_date_summary_dict['blank'])/\ - collection_date_summary_dict['total'])*100) - - def create_empty_events_for_one_subject_helper( form_events_file, translation_table_file): diff --git a/bin/report.py b/bin/report.py new file mode 100644 index 0000000..699d12c --- /dev/null +++ b/bin/report.py @@ -0,0 +1,220 @@ +import abc +import time + +import pkg_resources +from lxml import etree + +from utils import redi_email + + +class ReportCourier(object): + @abc.abstractmethod + def deliver(self, report): + raise NotImplementedError() + + +class ReportFileWriter(ReportCourier): + def __init__(self, output_file, logger): + self._output_file = output_file + self._logger = logger + + def deliver(self, report): + """ + Deliver the summary report by writing it to a file + or logging it to the console if writing the file fails + + :html_report_path the path where the report will be stored + :html the actual report content + """ + logger = self._logger + html_report_path = self._output_file + html = report + + problem_found = False + try: + report_file = open(html_report_path, 'w') + except (IOError, OSError) as e: + logger.exception('Could not open file: %s' % html_report_path) + problem_found = True + else: + try: + report_file.write(html) + logger.info("==> You can review the summary report by opening: {}"\ + " in your browser".format(html_report_path)) + except IOError: + logger.exception('Could not write file: %s' % html_report_path) + problem_found = True + finally: + report_file.close() + if problem_found: + logger.info("== Summary report ==" + html) + + +class ReportEmailSender(ReportCourier): + def __init__(self, settings, logger): + self._settings = settings + self._logger = logger + + def deliver(self, report): + """ + Deliver summary report as an email + + :email_settings dictinary with email parameters + :html the actual report content + """ + logger = self._logger + email_settings = self._settings + html = report + # TODO: Replace this with a "backup_courier" constructor injection + deliver_report_as_file = self.__backup_courier_wrapper() + + try: + redi_email.send_email_data_import_completed(email_settings, html) + logger.info("Summary report was emailed: parameter 'send_email = Y'") + except Exception as e: + logger.error("Unable to deliver the summary report due error: %s" % e) + deliver_report_as_file("report.html", html) + + def __backup_courier_wrapper(self): + # Needed for backwards-compatibility with the old + # deliver_report_by_file() call from deliver_report_by_email() + logger = self._logger + + def wrapper(filename, report): + return ReportFileWriter(filename, logger).deliver(report) + + return wrapper + + +class ReportCreator(object): + def __init__(self, report_parameters, report_data, alert_summary, + collection_date_summary_dict, writer): + self._report_parameters = report_parameters + self._report_data = report_data + self._alert_summary = alert_summary + self._collection_date_summary_dict = collection_date_summary_dict + self._writer = writer + + def create_report(self): + report_parameters = self._report_parameters + report_data = self._report_data + alert_summary = self._alert_summary + collection_date_summary_dict = self._collection_date_summary_dict + write_element_tree_to_file = self._writer.write + + root = etree.Element("report") + root.append(etree.Element("header")) + root.append(etree.Element("summary")) + root.append(etree.Element("alerts")) + root.append(etree.Element("subjectsDetails")) + root.append(etree.Element("errors")) + root.append(etree.Element("summaryOfSpecimenTakenTimes")) + updateReportHeader(root, report_parameters) + updateReportSummary(root, report_data) + updateSubjectDetails(root, report_data['subject_details']) + updateReportAlerts(root, alert_summary) + updateReportErrors(root, report_data['errors']) + updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict) + + # TODO: remove dependency on the order of the xml elements in the report + sort_by_value = 'lab_id' if report_parameters['is_sort_by_lab_id'] else 'redcap_id' + root.append(gen_ele("sort_details_by", sort_by_value)) + + tree = etree.ElementTree(root) + write_element_tree_to_file(tree,report_parameters.get('report_file_path')) + return tree + + def to_html(self, xml_report_tree): + report_xsl = pkg_resources.resource_filename('bin', 'utils/report.xsl') + + # print etree.tostring(xml_report_tree) + # report_xsl = proj_root + "bin/utils/report.xsl" + xslt = etree.parse(report_xsl) + transform = etree.XSLT(xslt) + html_report = transform(xml_report_tree) + html_str = etree.tostring(html_report, method='html', pretty_print=True) + + return html_str + + + +def updateReportHeader(root, report_parameters): + """ Update the passed `root` element tree with date, project name and url""" + header = root[0] + project = etree.SubElement(header, "project") + project.text = report_parameters.get('project') + date = etree.SubElement(header, "date") + date.text = time.strftime("%m/%d/%Y") + redcapServerAddress = etree.SubElement(header, "redcapServerAddress") + redcapServerAddress.text = report_parameters.get('redcap_uri') + + +def updateReportSummary(root, report_data): + summary = root[1] + subjectCount = etree.SubElement(summary, "subjectCount") + subjectCount.text = str(report_data.get('total_subjects')) + forms = etree.SubElement(summary, "forms") + form_data = report_data['form_details'] + for k in sorted(form_data.keys()): + form = etree.SubElement(forms, "form") + name_element = etree.SubElement(form, "form_name") + name_element.text = k + count_element = etree.SubElement(form, "form_count") + count_element.text = str(form_data.get(k)) + + +def updateReportAlerts(root, alert_summary): + alerts = root[2] + too_many_forms = etree.SubElement(alerts, 'tooManyForms') + too_many_values = etree.SubElement(alerts, 'tooManyValues') + for event in alert_summary['max_event_alert']: + event_alert = etree.SubElement(too_many_forms, 'eventAlert') + msg = etree.SubElement(event_alert, 'message') + msg.text = event + for value in alert_summary['multiple_values_alert']: + values_alert = etree.SubElement(too_many_values, 'valuesAlert') + msg = etree.SubElement(values_alert, 'message') + msg.text = value + + +def updateSubjectDetails(root, subject_details): + """ + Helper method for #create_summary_report() + Adds subject information to the xml tree which is later formated + by `bin/utils/report.xsl` into the html `table#subject_details"` + """ + subjectsDetails = root[3] + for key in sorted(subject_details.keys()): + subject = etree.SubElement(subjectsDetails, "subject") + details = subject_details.get(key) + subject.append(gen_ele("redcap_id", key)) + forms = etree.SubElement(subject, "forms") + + for k in sorted(details.keys()): + if(k.endswith("_Forms")): + form = etree.SubElement(forms, "form") + name_element = etree.SubElement(form, "form_name") + name_element.text = k + count_element = etree.SubElement(form, "form_count") + count_element.text = str(details.get(k)) + else: + element = etree.SubElement(subject, k) + element.text = str(details.get(k)) + + +def updateReportErrors(root, errors): + errorsRoot = root[4] + for error in errors: + errorElement = etree.SubElement(errorsRoot, "error") + errorElement.text = str(error) + + +def updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict): + timeSummaryRoot = root[5] + totalElement = etree.SubElement(timeSummaryRoot, "total") + totalElement.text = str(collection_date_summary_dict['total']) + blankElement = etree.SubElement(timeSummaryRoot, "blank") + blankElement.text = str(collection_date_summary_dict['blank']) + percentElement = etree.SubElement(timeSummaryRoot, "percent") + percentElement.text = str((float(collection_date_summary_dict['blank'])/\ + collection_date_summary_dict['total'])*100) diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 065786b..9acdb12 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -8,6 +8,7 @@ from StringIO import StringIO import time import redi +import report DEFAULT_DATA_DIRECTORY = os.getcwd() @@ -277,11 +278,16 @@ def test_create_summary_report(self): """ sys.path.append('config') - result = redi.create_summary_report(\ + class MockWriter(object): + def write(self, *args): + pass + + result = report.ReportCreator(\ self.test_report_params, \ self.test_report_data, \ self.test_alert_summary, \ - self.specimen_taken_time_summary) + self.specimen_taken_time_summary, + MockWriter()).create_report() result_string = etree.tostring(result) #print result_string xmlschema_doc = etree.parse(self.schema_str) @@ -298,7 +304,6 @@ def test_create_summary_report(self): def tearDown(self): # delete the created xml file os.remove(self.test_report_params['report_file_path']) - return if __name__ == '__main__': unittest.main() diff --git a/test/TestResume.py b/test/TestResume.py index 491008c..6e3d3bc 100644 --- a/test/TestResume.py +++ b/test/TestResume.py @@ -24,7 +24,7 @@ class FileDeleted(): redi._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, - database_path=None, redcap_client=None) + database_path=None, redcap_client=None, report_courier=None) def test_no_resume_stores(self): class MockPersonFormEvents(object): @@ -52,7 +52,8 @@ class FileStored(): redi._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, - database_path=None, redcap_client=None) + database_path=None, redcap_client=None, + report_courier=None) def test_resume_fetches_data_from_last_run(self): class MockPersonFormEvents(object): @@ -72,7 +73,8 @@ class DataFetched(): redi._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, - database_path=None, resume=True, redcap_client=None) + database_path=None, resume=True, redcap_client=None, + report_courier=None) class MockSettings(object): From 4ba620e0f8d5720e8ea9446d1a548738a9362983 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Tue, 14 Oct 2014 09:01:36 -0400 Subject: [PATCH 27/51] Move ReportCreator creation up to main() This required replacing a __FILE__-based call with pkg_resource, which fixes ctsit/redi#99. --- bin/redi.py | 53 +++++++++++++++++++++------------------------------ bin/report.py | 34 +++++++++++++++++---------------- 2 files changed, 40 insertions(+), 47 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 8351ab5..4c18f9f 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -174,6 +174,23 @@ def main(): redcap_client = connect_to_redcap(get_email_settings(settings), get_redcap_settings(settings), dry_run) + report_file_path = os.path.join(configuration_directory, + settings.report_file_path) + + report_parameters = { + 'report_file_path': report_file_path, + 'project': settings.project, + 'redcap_uri': settings.redcap_uri, + 'is_sort_by_lab_id': settings.is_sort_by_lab_id, + } + + # TODO: remove the need for the ReportWriter object (backwards-compat) + class ReportWriter(object): + def write(self, element_tree, file_name): + write_element_tree_to_file(element_tree, file_name) + + report_creator = report.ReportCreator(report_parameters, ReportWriter()) + if settings.send_email: report_courier = report.ReportEmailSender(get_email_settings(settings), logger) else: @@ -181,7 +198,8 @@ def main(): _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, output_files, db_path, redcap_client, - report_courier, args['--resume'], args['--skip-blanks']) + report_courier, report_creator, args['--resume'], + args['--skip-blanks']) def _makedirs(data_folder): @@ -252,7 +270,7 @@ def connect_to_redcap(email_settings, redcap_settings, dry_run=False): def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, data_folder, database_path, redcap_client, - report_courier, resume=False, skip_blanks=False): + report_courier, report_creator, resume=False, skip_blanks=False): global translational_table_tree assert _person_form_events_service is not None @@ -288,16 +306,6 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, translation_table_file = os.path.join(configuration_directory, \ settings.translation_table_file) - report_file_path = os.path.join(configuration_directory,\ - settings.report_file_path) - - report_parameters = { - 'report_file_path': report_file_path, - 'project': settings.project, - 'redcap_uri': settings.redcap_uri, - 'is_sort_by_lab_id': settings.is_sort_by_lab_id, - } - if not resume: _delete_last_runs_data(data_folder) @@ -339,17 +347,8 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, report_data['errors'].extend(rule_errors) # create summary report - # TODO: remove the need for the ReportWriter object (backwards-compat) - class ReportWriter(object): - def write(self, element_tree, file_name): - write_element_tree_to_file(element_tree, file_name) - - creator = report.ReportCreator(report_parameters, - report_data, alert_summary, - collection_date_summary_dict, - ReportWriter()) - xml_report_tree = creator.create_report() - html_str = creator.to_html(xml_report_tree) + html_str = report_creator.create_report( + report_data, alert_summary, collection_date_summary_dict) report_courier.deliver(html_str) @@ -1884,14 +1883,6 @@ def store(self, pfe_tree): method="xml", pretty_print=True) -def gen_ele(ele_name, ele_text): - """ Create an xml element with given name and content """ - return etree.XML("<{}>{}".format(ele_name, ele_text, ele_name)) - -def gen_subele(parent, subele_name, subele_text): - subele = etree.SubElement(parent, subele_name) - subele.text = subele_text - return subele if __name__ == "__main__": main() diff --git a/bin/report.py b/bin/report.py index 699d12c..360ee96 100644 --- a/bin/report.py +++ b/bin/report.py @@ -6,6 +6,8 @@ from utils import redi_email +REDI_PACKAGE_NAME = 'bin' + class ReportCourier(object): @abc.abstractmethod @@ -87,19 +89,12 @@ def wrapper(filename, report): class ReportCreator(object): - def __init__(self, report_parameters, report_data, alert_summary, - collection_date_summary_dict, writer): + def __init__(self, report_parameters, writer): self._report_parameters = report_parameters - self._report_data = report_data - self._alert_summary = alert_summary - self._collection_date_summary_dict = collection_date_summary_dict self._writer = writer - def create_report(self): + def create_report(self, report_data, alert_summary, collection_date_summary_dict): report_parameters = self._report_parameters - report_data = self._report_data - alert_summary = self._alert_summary - collection_date_summary_dict = self._collection_date_summary_dict write_element_tree_to_file = self._writer.write root = etree.Element("report") @@ -122,16 +117,12 @@ def create_report(self): tree = etree.ElementTree(root) write_element_tree_to_file(tree,report_parameters.get('report_file_path')) - return tree - - def to_html(self, xml_report_tree): - report_xsl = pkg_resources.resource_filename('bin', 'utils/report.xsl') - # print etree.tostring(xml_report_tree) - # report_xsl = proj_root + "bin/utils/report.xsl" + report_xsl = pkg_resources.resource_filename(REDI_PACKAGE_NAME, + 'utils/report.xsl') xslt = etree.parse(report_xsl) transform = etree.XSLT(xslt) - html_report = transform(xml_report_tree) + html_report = transform(tree) html_str = etree.tostring(html_report, method='html', pretty_print=True) return html_str @@ -218,3 +209,14 @@ def updateSummaryOfSpecimenTakenTimes(root, collection_date_summary_dict): percentElement = etree.SubElement(timeSummaryRoot, "percent") percentElement.text = str((float(collection_date_summary_dict['blank'])/\ collection_date_summary_dict['total'])*100) + + +def gen_ele(ele_name, ele_text): + """ Create an xml element with given name and content """ + return etree.XML("<{}>{}".format(ele_name, ele_text, ele_name)) + + +def gen_subele(parent, subele_name, subele_text): + subele = etree.SubElement(parent, subele_name) + subele.text = subele_text + return subele From 9d9273e990836b4d67b9c395b5498e0930b1c058 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Tue, 21 Oct 2014 11:25:59 -0400 Subject: [PATCH 28/51] Make ReportCreator constructor more explicit --- bin/redi.py | 17 ++++------------- bin/report.py | 13 +++++++++---- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 4c18f9f..6be0f8e 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -177,19 +177,10 @@ def main(): report_file_path = os.path.join(configuration_directory, settings.report_file_path) - report_parameters = { - 'report_file_path': report_file_path, - 'project': settings.project, - 'redcap_uri': settings.redcap_uri, - 'is_sort_by_lab_id': settings.is_sort_by_lab_id, - } - - # TODO: remove the need for the ReportWriter object (backwards-compat) - class ReportWriter(object): - def write(self, element_tree, file_name): - write_element_tree_to_file(element_tree, file_name) - - report_creator = report.ReportCreator(report_parameters, ReportWriter()) + report_creator = report.ReportCreator(report_file_path, settings.project, + settings.redcap_uri, + settings.is_sort_by_lab_id, + write_element_tree_to_file) if settings.send_email: report_courier = report.ReportEmailSender(get_email_settings(settings), logger) diff --git a/bin/report.py b/bin/report.py index 360ee96..16ed2ab 100644 --- a/bin/report.py +++ b/bin/report.py @@ -89,13 +89,19 @@ def wrapper(filename, report): class ReportCreator(object): - def __init__(self, report_parameters, writer): - self._report_parameters = report_parameters + def __init__(self, report_file_path, project_name, redcap_uri, + sort_by_lab_id, writer): + self._report_parameters = { + 'report_file_path': report_file_path, + 'project': project_name, + 'redcap_uri': redcap_uri, + 'is_sort_by_lab_id': sort_by_lab_id + } self._writer = writer def create_report(self, report_data, alert_summary, collection_date_summary_dict): report_parameters = self._report_parameters - write_element_tree_to_file = self._writer.write + write_element_tree_to_file = self._writer root = etree.Element("report") root.append(etree.Element("header")) @@ -128,7 +134,6 @@ def create_report(self, report_data, alert_summary, collection_date_summary_dict return html_str - def updateReportHeader(root, report_parameters): """ Update the passed `root` element tree with date, project name and url""" header = root[0] From f5bd9cb07ccf2004420f4ec71e7bea68ea33de5f Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Tue, 21 Oct 2014 11:29:49 -0400 Subject: [PATCH 29/51] Update unit tests --- test/TestCreateSummaryReport.py | 29 ++++++++++++++++++++--------- test/TestResume.py | 7 ++++--- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 9acdb12..3481028 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -279,15 +279,23 @@ def test_create_summary_report(self): sys.path.append('config') class MockWriter(object): - def write(self, *args): - pass + def __call__(self, *args, **kwargs): + #expected call: write(tree, report_file_path) + self.result = args[0] + writer = MockWriter() + + creator = report.ReportCreator( + self.test_report_params['report_file_path'], + self.test_report_params['project'], + self.test_report_params['redcap_uri'], + self.test_report_params['is_sort_by_lab_id'], + writer) + + creator.create_report(self.test_report_data, self.test_alert_summary, + self.specimen_taken_time_summary) + + result = writer.result - result = report.ReportCreator(\ - self.test_report_params, \ - self.test_report_data, \ - self.test_alert_summary, \ - self.specimen_taken_time_summary, - MockWriter()).create_report() result_string = etree.tostring(result) #print result_string xmlschema_doc = etree.parse(self.schema_str) @@ -303,7 +311,10 @@ def write(self, *args): def tearDown(self): # delete the created xml file - os.remove(self.test_report_params['report_file_path']) + try: + os.remove(self.test_report_params['report_file_path']) + except: + pass if __name__ == '__main__': unittest.main() diff --git a/test/TestResume.py b/test/TestResume.py index 6e3d3bc..d17b0c3 100644 --- a/test/TestResume.py +++ b/test/TestResume.py @@ -24,7 +24,8 @@ class FileDeleted(): redi._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, - database_path=None, redcap_client=None, report_courier=None) + database_path=None, redcap_client=None, + report_courier=None, report_creator=None) def test_no_resume_stores(self): class MockPersonFormEvents(object): @@ -53,7 +54,7 @@ class FileStored(): do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, database_path=None, redcap_client=None, - report_courier=None) + report_courier=None, report_creator=None) def test_resume_fetches_data_from_last_run(self): class MockPersonFormEvents(object): @@ -74,7 +75,7 @@ class DataFetched(): do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, database_path=None, resume=True, redcap_client=None, - report_courier=None) + report_courier=None, report_creator=None) class MockSettings(object): From d26d52b5a17fc3e0834a49402cc3e36914731920 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Tue, 21 Oct 2014 11:59:24 -0400 Subject: [PATCH 30/51] Cleanup import statements Replaced get_proj_root() with pkg_resource equivalent. Removed unused argparse import. --- bin/redi.py | 33 ++++++++++++--------------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 6be0f8e..208df7e 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -47,13 +47,14 @@ import xml.etree.ElementTree as ET import sys import imp -import argparse import os +import pkg_resources from requests import RequestException from lxml import etree from docopt import docopt +import redi_lib import report from utils import redi_email from utils.redcapClient import RedcapClient @@ -62,24 +63,6 @@ from utils.GetEmrData import EmrFileAccessDetails -def get_proj_root(): - file_dir = os.path.dirname(os.path.realpath(__file__)) - proj_root = os.path.abspath(os.path.join(file_dir, "../")) + '/' - return proj_root - - -def get_db_path(batch_info_database, database_path): - if not os.path.exists(database_path): - os.makedirs(database_path) - - db_path = os.path.join(database_path, batch_info_database) - return db_path - - -proj_root = get_proj_root() -import redi_lib - - # Command line default argument values _person_form_events_service = None @@ -193,6 +176,14 @@ def main(): args['--skip-blanks']) +def get_db_path(batch_info_database, database_path): + if not os.path.exists(database_path): + os.makedirs(database_path) + + db_path = os.path.join(database_path, batch_info_database) + return db_path + + def _makedirs(data_folder): # Like os.makedirs() but suppresses error if path already exists. try: @@ -388,8 +379,8 @@ def _create_person_form_event_tree_with_data( # Convert COMPONENT_ID to loinc_code in the raw data component_to_loinc_code_xml = os.path.join(configuration_directory, \ settings.component_to_loinc_code_xml) - component_to_loinc_code_xsd = proj_root + \ - "bin/utils/component_id_to_loinc_code.xsd" + component_to_loinc_code_xsd = pkg_resources.resource_filename( + 'bin', 'utils/component_id_to_loinc_code.xsd') component_to_loinc_code_xml_tree = validate_xml_file_and_extract_data \ (component_to_loinc_code_xml, component_to_loinc_code_xsd) convert_component_id_to_loinc_code(data, component_to_loinc_code_xml_tree) From ba5df478853726dbcb477465e020110454b8f2e2 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Wed, 22 Oct 2014 17:28:02 -0400 Subject: [PATCH 31/51] Reformat `synthetic_data/README.md` to 80 chars + add note about generating integers --- scripts/synthetic_data/README.md | 53 +++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/scripts/synthetic_data/README.md b/scripts/synthetic_data/README.md index 6e21ac0..bf1fe36 100644 --- a/scripts/synthetic_data/README.md +++ b/scripts/synthetic_data/README.md @@ -1,23 +1,37 @@ # Synthetic Data Tools -makefakedata.R is a tool to create synthetic clinical lab data from simple template files. These files can be used to create sample input data to be processed by RED-I and loaded into a REDCap system. - -Using a file that defines the components of a test panel, normal ranges for their values and typical units, makefakedata can create a file of lab results for that panel with multiple instances of that panel for multiple study subjects, over a range of dates. The number of panels, research subjects, date ranges, input and output file names can all be controlled with parameters of makefakedata. - -makefakedata is designed to create sample datasets that are free of identifiers and any clinical history. With no claim to ownership, no research value, and no history of private data, these files can be published as test datasets with any software project. - -Test data sets can be tailored to the needs of the individual software project via panel templates and input parameters. +makefakedata.R is a tool to create synthetic clinical lab data from simple +template files. +These files can be used to create sample input data to be processed by RED-I +and loaded into a REDCap system. + +Using a file that defines the components of a test panel, normal ranges for +their values and typical units, makefakedata can create a file of lab results +for that panel with multiple instances of that panel for multiple study +subjects, over a range of dates. The number of panels, research subjects, date +ranges, input and output file names can all be controlled with parameters of +makefakedata. + +makefakedata is designed to create sample datasets that are free of identifiers +and any clinical history. With no claim to ownership, no research value, and +no history of private data, these files can be published as test datasets with +any software project. + +Test data sets can be tailored to the needs of the individual software project +via panel templates and input parameters. # Example -In this example, 3 subjects are created in both the CBC and Chemistry output files. For each subject, 7-20 panels of each test will be created. +In this example, 3 subjects are created in both the CBC and Chemistry output +files. For each subject, 7-20 panels of each test will be created. chem <- makefakedata("chemistry_input.csv", "output-chem.csv", min_panel=7, max_panel=20, subject_count=3) cbc <- makefakedata("cbc_input.csv", "output-cbc.csv", min_panel=7, max_panel=20, subject_count=3) # Panel Templates -A template file is a CSV file containing a header row of column labels and one lab component per row. Typical columns for the panel template are +A template file is a CSV file containing a header row of column labels and one +lab component per row. Typical columns for the panel template are * loinc_component - a name that describe a lab component * loinc_code - the code for that lab component @@ -27,7 +41,10 @@ A template file is a CSV file containing a header row of column labels and one l * panel - a lab panel on which these tests are likely to appear * loinc_long_common_name - a more descriptive name from LOINC -The columns _low_ and _high_ define a range from which the result value will random chosen. All other columns are strictly optional, but recommended. The input values _loinc_component, loinc_code, low, high, and units_ in the input will appear in the output file without alteration. +The columns _low_ and _high_ define a range from which the result value will +random chosen. All other columns are strictly optional, but recommended. +The input values _loinc_component, loinc_code, low, high, and units_ in the +input will appear in the output file without alteration. A typical panel template looks like this: @@ -38,7 +55,8 @@ A typical panel template looks like this: Platelets,26515-7,0.172,0.45,10*3/uL,cbc,Platelets [#/volume] in Blood Hemoglobin,718-7,12,16,g/dl,cbc,Hemoglobin [Mass/volume] in Blood -Here two CBC panels for one subject generated by makefakedata using the above panel template as an input. +Here two CBC panels for one subject generated by makefakedata using the above +panel template as an input. "result","loinc_component","loinc_code","low","high","units","date_time_stamp","study_id" 3.813,"Leukocytes","26464-8",3.8,10.8,"10*3/uL",2112-10-27,1 @@ -54,6 +72,9 @@ Here two CBC panels for one subject generated by makefakedata using the above pa # Usage +Accepted function arguments + +
     data <- makefakedata(
         input,
         output,
@@ -63,6 +84,16 @@ Here two CBC panels for one subject generated by makefakedata using the above pa
         start_date=<"Earliest date for a lab panel in YYYY-MM-DD format">,
         end_date=<"Latest date for a lab panel in YYYY-MM-DD format">,
         subject_count=)
+
+Command line example: +
+$ r
+$ source('makefakedata.R')
+$ makefakedata('example_input.csv', 'out.csv', subject_count=10)
+
+Notes: + - If min_panel = max_panel then 5 sets of data are generated + - Currently the code does not support integer numbers generation From 0c1f98291d834fd586d8c457df6f430de8f4e5ae Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 23 Oct 2014 18:11:37 -0400 Subject: [PATCH 32/51] Added helper class `bin/utils/rawxml.py` to store information about the input file (can add more info if needed) --- bin/utils/rawxml.py | 52 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 bin/utils/rawxml.py diff --git a/bin/utils/rawxml.py b/bin/utils/rawxml.py new file mode 100644 index 0000000..02c326b --- /dev/null +++ b/bin/utils/rawxml.py @@ -0,0 +1,52 @@ +import os.path +import time +import datetime + +class RawXml(object): + """ + This class is used to store details about the input file + @see redi_lib/check_input_file() + """ + + def __init__(self, project, path): + """ + Parameters + ---------- + project : string + The project name - the owner of the xml file + path : string + The xml file path + """ + self._project = project + self._path = path + + + def get_project(self): + return self._project + + def get_creation_time(self): + """ Get the OS creation time """ + #tst = time.ctime(os.path.getctime(self._path)) + tst = os.path.getctime(self._path) + return datetime.datetime.fromtimestamp(tst) + + + def get_last_modified_time(self): + """ Get the OS modification time """ + tst = os.path.getmtime(self._path) + return datetime.datetime.fromtimestamp(tst) + + + def get_info(self): + """ + Return a string containing all details available about the xml file + """ + info = """ +Project name: {0} +File path: {1} +File created: {2} +File last modified: {3} """.format(self._project, \ + self._path, \ + self.get_creation_time(), \ + self.get_last_modified_time()) + return info From a2aebbe11a05776d52d70a2b68543d3deb0bf02d Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 23 Oct 2014 18:13:15 -0400 Subject: [PATCH 33/51] Fix unittest for sending emails due signature change `redi_email.send_email_input_data_unchanged()` --- test/TestRediEmail.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/TestRediEmail.py b/test/TestRediEmail.py index b106a8d..4ac3b5e 100644 --- a/test/TestRediEmail.py +++ b/test/TestRediEmail.py @@ -2,6 +2,7 @@ import smtplib from mock import patch, call import redi +from utils.rawxml import RawXml from utils import redi_email @@ -26,6 +27,8 @@ def setUp(self): } self.settings = type("", (), settings)() self.email_settings = redi.get_email_settings(self.settings) + self.raw_xml = RawXml('', __file__) + def test_get_email_settings(self): """Check if we picked proper values from the global settings""" @@ -57,14 +60,14 @@ def test_success(self): """ Verify return true when email is sent""" ese = self.email_settings self.assertTrue(redi_email.send_email_redcap_connection_error(ese)) - self.assertTrue(redi_email.send_email_input_data_unchanged(ese)) + self.assertTrue(redi_email.send_email_input_data_unchanged(ese, self.raw_xml)) @patch.multiple(redi_email, send_email=dummy_send_failed) def test_failed(self): """ Verify return false when email is not sent""" ese = self.email_settings self.assertFalse(redi_email.send_email_redcap_connection_error(ese)) - self.assertFalse(redi_email.send_email_input_data_unchanged(ese)) + self.assertFalse(redi_email.send_email_input_data_unchanged(ese, self.raw_xml)) @patch("smtplib.SMTP") def test_mime_email(self, mock_smtp): @@ -82,7 +85,8 @@ def test_mime_email_exception(self, mock_smtp): ese = self.email_settings instance = mock_smtp.return_value instance.sendmail.side_effect = smtplib.SMTPRecipientsRefused({}) - self.assertRaises(smtplib.SMTPRecipientsRefused, redi_email.send_email_data_import_completed, ese) + self.assertRaises(smtplib.SMTPRecipientsRefused,\ + redi_email.send_email_data_import_completed, ese) self.assertEqual(instance.sendmail.call_count, 1) def tearDown(self): From 3e9e703edd645823541a0ca2dd49450797125da3 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 23 Oct 2014 18:15:47 -0400 Subject: [PATCH 34/51] Pass helper object RawXml to send_email_input_data_unchanged() to make the email about unchanged input file more specific --- bin/redi.py | 2 +- bin/redi_lib.py | 13 ++++++++----- bin/utils/redi_email.py | 20 ++++++++++++++------ 3 files changed, 23 insertions(+), 12 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 208df7e..5c2deeb 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -499,7 +499,7 @@ def _create_person_form_event_tree_with_data( def _check_input_file(db_path, email_settings, raw_xml_file, settings): - return redi_lib.check_input_file(settings.batch_warning_days, db_path, email_settings, raw_xml_file) + return redi_lib.check_input_file(settings.batch_warning_days, db_path, email_settings, raw_xml_file, settings.project) def read_config(config_file, configuration_directory, file_list): diff --git a/bin/redi_lib.py b/bin/redi_lib.py index a89bb51..5ebee0a 100644 --- a/bin/redi_lib.py +++ b/bin/redi_lib.py @@ -19,7 +19,7 @@ from datetime import date import hashlib import utils.redi_email as redi_email -from utils.redcapClient import RedcapClient +from utils.rawxml import RawXml from requests import RequestException from lxml import etree import logging @@ -423,7 +423,7 @@ def dict_factory(cursor, row): """ -def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file): +def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, project): batch = None if not os.path.exists(db_path) : @@ -462,18 +462,20 @@ def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file): # preserve data types if (days_since_today > int(batch_warning_days)): + raw_xml = RawXml(project, raw_xml_file) + msg_file_details = "\nXML file details: " + raw_xml.get_info() logger.info('Last import was started on: %s which is more than the limit of %s' % (old_batch['rbStartTime'], batch_warning_days)) if (-1 == int(batch_warning_days)): msg_continue = """ The configuration `batch_warning_days = -1` indicates that we want to continue execution even if the input file did not change - """ + """ + msg_file_details logger.info(msg_continue) else: msg_quit = "The input file did not change in the past: %s days. Stop data import." % batch_warning_days - logger.critical(msg_quit) - redi_email.send_email_input_data_unchanged(email_settings) + logger.critical(msg_quit + msg_file_details) + redi_email.send_email_input_data_unchanged(email_settings, raw_xml) sys.exit() else: logger.info('Reusing md5 entry: ' + str(old_batch['rbID'])) @@ -481,6 +483,7 @@ def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file): return old_batch + """ Retrieve the row corresponding to the last REDI batch completed """ diff --git a/bin/utils/redi_email.py b/bin/utils/redi_email.py index 2d02f5f..b041905 100644 --- a/bin/utils/redi_email.py +++ b/bin/utils/redi_email.py @@ -43,22 +43,30 @@ def send_email_redcap_connection_error(email_settings, subject='', msg=''): logger.error('Exception: Unable to communicate with REDCap instance at: ' + email_settings['redcap_uri']) return send_email(host, str(port), sender, to_addr_list, None, subject, msg) -def send_email_input_data_unchanged(email_settings, subject='', msg=''): +def send_email_input_data_unchanged(email_settings, raw_xml): """ Send a warning email to the `redcap_support_receiver_email` if the input file did not change for more than `batch_warning_days` + Return True if the email was sent - :return True if the email was sent + Parameters + ---------- + email_settings : dictionary + The email delivery parameters + raw_xml : RawXml instance + The object storing details about the input file """ sender = email_settings['redcap_support_sender_email'] to_addr_list = email_settings['redcap_support_receiving_list'] host = email_settings['smtp_host_for_outbound_mail'] port = email_settings['smtp_port_for_outbound_mail'] - subject = 'Input data is static.' + subject = "The data for '{0}' project did not change in more than {1} days.".format(raw_xml.get_project(), email_settings['batch_warning_days']) msg = """ - Administrators, - For the past {} days the input data for the REDI application did not change. - Please investigate.""".format(email_settings['batch_warning_days']) +Administrators, + """ + subject + """ + +Please check if the input xml file is in the proper location. + """ + raw_xml.get_info() return send_email(host, str(port), sender, to_addr_list, None, subject, msg) From bd43e4da8f28fe838cb34db66f7e82254765e85f Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Thu, 23 Oct 2014 18:17:13 -0400 Subject: [PATCH 35/51] Save two minor tweaks to make vagrant testing easier --- vagrant/Makefile | 3 +++ vagrant/aliases | 2 ++ 2 files changed, 5 insertions(+) diff --git a/vagrant/Makefile b/vagrant/Makefile index d892098..fa17c6f 100644 --- a/vagrant/Makefile +++ b/vagrant/Makefile @@ -125,6 +125,9 @@ rc_get_json: check_config rc_get_enrollment: check_config $(REDCAP_RECORDS_CMD) -f "$(REDCAP_PROJECT_ENROLLMENT_FORM)" -t csv +rc_get_enrollment_meta: + @curl -X POST http://localhost:8998/redcap/api/ -d token=$(REDCAP_VM_TOKEN) -d content=metadata -d format=csv -d forms[]=enrollment + rc_fresh: make copy_project_data make rc_clean diff --git a/vagrant/aliases b/vagrant/aliases index d6777fa..1339306 100644 --- a/vagrant/aliases +++ b/vagrant/aliases @@ -2,6 +2,8 @@ alias db='mysql --prompt="(\u@\h) [\d]> " --pager="less -niSFX" -uroot -ppasswor alias check_redcap="curl -s http://localhost/redcap/ | grep -i 'Welcome\|Critical Error' " alias restart_httpd='sudo /etc/init.d/apache2 reload ' alias restart_mysql='sudo service mysql restart' + +alias cdapi='cd /var/www/redcap/redcap_v5.7.4/API && pwd && ls -al' alias ls='ls --color=auto' alias lsa='ls -al' alias lss='ls -ltr' From 477bc49d5a3a7358d3a5a98dfa7d51e314b21c0a Mon Sep 17 00:00:00 2001 From: Ruchi Vivek Desai Date: Thu, 30 Oct 2014 12:09:36 -0400 Subject: [PATCH 36/51] Write report.xml to data directory instead of configuration directory --- bin/redi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/redi.py b/bin/redi.py index 5c2deeb..a27e94b 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -157,7 +157,7 @@ def main(): redcap_client = connect_to_redcap(get_email_settings(settings), get_redcap_settings(settings), dry_run) - report_file_path = os.path.join(configuration_directory, + report_file_path = os.path.join(data_directory, settings.report_file_path) report_creator = report.ReportCreator(report_file_path, settings.project, From b9f0f8fb488080746aee6a2bd43a8045d9ff28de Mon Sep 17 00:00:00 2001 From: Ruchi Vivek Desai Date: Thu, 30 Oct 2014 13:07:04 -0400 Subject: [PATCH 37/51] Write report.xml to temporary out directory instead of data directory --- bin/redi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/redi.py b/bin/redi.py index a27e94b..c91f1e1 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -157,7 +157,7 @@ def main(): redcap_client = connect_to_redcap(get_email_settings(settings), get_redcap_settings(settings), dry_run) - report_file_path = os.path.join(data_directory, + report_file_path = os.path.join(output_files, settings.report_file_path) report_creator = report.ReportCreator(report_file_path, settings.project, From 0b4e806d16d1011fda9daad6eea9929bc967203e Mon Sep 17 00:00:00 2001 From: Ruchi Vivek Desai Date: Thu, 30 Oct 2014 13:40:59 -0400 Subject: [PATCH 38/51] Write report.html to data directory with timestamp #92 --- bin/redi.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/redi.py b/bin/redi.py index c91f1e1..6cf1c6c 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -168,7 +168,11 @@ def main(): if settings.send_email: report_courier = report.ReportEmailSender(get_email_settings(settings), logger) else: - report_courier = report.ReportFileWriter(settings.report_file_path2, logger) + index_of_dot_html = settings.report_file_path2.find('.html') + report_file_path2 = settings.report_file_path2[:index_of_dot_html] + \ + '_' + datetime.now().strftime('%Y_%m_%d-%H_%M_%S') + '.html' + report_courier = report.ReportFileWriter(os.path.join(data_directory, + report_file_path2), logger) _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, output_files, db_path, redcap_client, From 5381b72c9a3719b424dac0d82180d63cf1117b50 Mon Sep 17 00:00:00 2001 From: Ruchi Vivek Desai Date: Wed, 5 Nov 2014 14:25:06 -0500 Subject: [PATCH 39/51] Remove timestamp from filename of report and write it to the same folder as report.xml --- bin/redi.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bin/redi.py b/bin/redi.py index 6cf1c6c..287bec6 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -168,11 +168,8 @@ def main(): if settings.send_email: report_courier = report.ReportEmailSender(get_email_settings(settings), logger) else: - index_of_dot_html = settings.report_file_path2.find('.html') - report_file_path2 = settings.report_file_path2[:index_of_dot_html] + \ - '_' + datetime.now().strftime('%Y_%m_%d-%H_%M_%S') + '.html' - report_courier = report.ReportFileWriter(os.path.join(data_directory, - report_file_path2), logger) + report_courier = report.ReportFileWriter(os.path.join(output_files, + settings.report_file_path2), logger) _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, output_files, db_path, redcap_client, From 70227351a2dc7aaba2a741762685ca8365c84d04 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Wed, 5 Nov 2014 15:00:26 -0500 Subject: [PATCH 40/51] Rename "bin" to "redi" I used PyCharm to rename the bin/ directory to redi/, then updated some additional references to bin, such as in Makefile and docs/conf.py. This is a backwards-incompatiable change, so we should increment the major version of the software. --- Makefile | 9 ++-- docs/conf.py | 4 +- {bin => redi}/__init__.py | 0 {bin => redi}/form.py | 0 {bin => redi}/redi.py | 2 +- {bin => redi}/redi_lib.py | 2 +- {bin => redi}/report.py | 4 +- {bin => redi}/utils/GetEmrData.py | 0 {bin => redi}/utils/SimpleConfigParser.py | 0 {bin => redi}/utils/__init__.py | 0 .../utils/component_id_to_loinc_code.xsd | 0 {bin => redi}/utils/csv2xml.py | 0 .../date.month-abbreviation.template.xsl | 0 {bin => redi}/utils/date.year.template.xsl | 0 {bin => redi}/utils/rawxml.py | 0 {bin => redi}/utils/redcapClient.py | 0 {bin => redi}/utils/redcap_records.py | 0 {bin => redi}/utils/redi_email.py | 0 {bin => redi}/utils/report.xsl | 0 setup.cfg | 2 +- setup.py | 4 +- test/TestAddElementsToTree.py | 2 +- test/TestConvertComponentIdToLoincCode.py | 2 +- test/TestCopyDataToPersonFormEventTree.py | 2 +- test/TestCreateEmptyEventTreeForStudy.py | 2 +- test/TestCreateEmptyEventsForOneSubject.py | 2 +- test/TestCreateImportDataJson.py | 4 +- test/TestCreateSummaryReport.py | 4 +- test/TestDaysSinceToday.py | 2 +- test/TestGenerateOutput.py | 6 +-- test/TestGetEMRData.py | 4 +- test/TestHandleREDCapResponse.py | 4 +- test/TestLog.py | 2 +- test/TestParseAll.py | 2 +- test/TestParseRawXml.py | 2 +- test/TestParseTranslationTable.py | 2 +- test/TestPersonFormEventsRepository.py | 2 +- test/TestRateLimiter.py | 54 +++++++++++++++++++ test/TestReadConfig.py | 4 +- test/TestRediEmail.py | 8 +-- test/TestResearchIdToRedcapId.py | 8 +-- test/TestResume.py | 12 ++--- test/TestSendDatatoRedcap.py | 2 +- test/TestSortElementTree.py | 2 +- test/TestUpdateDataFromLookup.py | 2 +- test/TestUpdateEventName.py | 2 +- test/TestUpdateFormCompletedFieldName.py | 2 +- test/TestUpdateFormDateField.py | 2 +- test/TestUpdateFormImported.py | 2 +- .../TestUpdateRedcapFieldNameValueAndUnits.py | 2 +- test/TestUpdateRedcapForm.py | 2 +- test/TestUpdateStatusField.py | 2 +- test/TestUpdateTimestamp.py | 2 +- test/TestValidateXmlFleAndExtractData.py | 2 +- test/TestVerifyAndCorrectCollectionDate.py | 4 +- test/TestWriteToFile.py | 2 +- 56 files changed, 119 insertions(+), 68 deletions(-) rename {bin => redi}/__init__.py (100%) rename {bin => redi}/form.py (100%) rename {bin => redi}/redi.py (99%) rename {bin => redi}/redi_lib.py (99%) rename {bin => redi}/report.py (98%) rename {bin => redi}/utils/GetEmrData.py (100%) rename {bin => redi}/utils/SimpleConfigParser.py (100%) rename {bin => redi}/utils/__init__.py (100%) rename {bin => redi}/utils/component_id_to_loinc_code.xsd (100%) rename {bin => redi}/utils/csv2xml.py (100%) rename {bin => redi}/utils/date.month-abbreviation.template.xsl (100%) rename {bin => redi}/utils/date.year.template.xsl (100%) rename {bin => redi}/utils/rawxml.py (100%) rename {bin => redi}/utils/redcapClient.py (100%) rename {bin => redi}/utils/redcap_records.py (100%) rename {bin => redi}/utils/redi_email.py (100%) rename {bin => redi}/utils/report.xsl (100%) create mode 100644 test/TestRateLimiter.py diff --git a/Makefile b/Makefile index 6b8509f..23fce46 100644 --- a/Makefile +++ b/Makefile @@ -23,17 +23,14 @@ help: test: tests tests: coverage - [ ! -d config/rules ] || PYTHONPATH=bin \ - python -munittest discover config/rules + [ ! -d config/rules ] || python -munittest discover config/rules coverage: - ARCHFLAGS=$(ARCHFLAGS) PYTHONPATH=bin \ - python setup.py nosetests + ARCHFLAGS=$(ARCHFLAGS) python setup.py nosetests lint: which pylint || sudo easy_install pylint - ARCHFLAGS=$(ARCHFLAGS) PYTHONPATH=bin \ - pylint -f parseable bin | tee pylint.out + ARCHFLAGS=$(ARCHFLAGS) pylint -f parseable redi | tee pylint.out clean: find . -type f -name "*.pyc" -print | xargs rm -f diff --git a/docs/conf.py b/docs/conf.py index 7f9a358..f42570a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -21,8 +21,8 @@ #sys.path.insert(0, os.path.abspath('.')) sys.path.insert(0, os.path.abspath('..')) -from bin import redi -from bin import redi_lib +from redi import redi +from redi import redi_lib # -- General configuration ------------------------------------------------ diff --git a/bin/__init__.py b/redi/__init__.py similarity index 100% rename from bin/__init__.py rename to redi/__init__.py diff --git a/bin/form.py b/redi/form.py similarity index 100% rename from bin/form.py rename to redi/form.py diff --git a/bin/redi.py b/redi/redi.py similarity index 99% rename from bin/redi.py rename to redi/redi.py index 5c2deeb..30a880e 100755 --- a/bin/redi.py +++ b/redi/redi.py @@ -380,7 +380,7 @@ def _create_person_form_event_tree_with_data( component_to_loinc_code_xml = os.path.join(configuration_directory, \ settings.component_to_loinc_code_xml) component_to_loinc_code_xsd = pkg_resources.resource_filename( - 'bin', 'utils/component_id_to_loinc_code.xsd') + 'redi', 'utils/component_id_to_loinc_code.xsd') component_to_loinc_code_xml_tree = validate_xml_file_and_extract_data \ (component_to_loinc_code_xml, component_to_loinc_code_xsd) convert_component_id_to_loinc_code(data, component_to_loinc_code_xml_tree) diff --git a/bin/redi_lib.py b/redi/redi_lib.py similarity index 99% rename from bin/redi_lib.py rename to redi/redi_lib.py index 5ebee0a..89fca0d 100644 --- a/bin/redi_lib.py +++ b/redi/redi_lib.py @@ -409,7 +409,7 @@ def dict_factory(cursor, row): return d """ -@see bin/redi.py#main() +@see redi/redi.py#main() @return a dictionary representation of the batch row for the current run Check the md5sum of the input file diff --git a/bin/report.py b/redi/report.py similarity index 98% rename from bin/report.py rename to redi/report.py index 16ed2ab..e5b9f9c 100644 --- a/bin/report.py +++ b/redi/report.py @@ -6,7 +6,7 @@ from utils import redi_email -REDI_PACKAGE_NAME = 'bin' +REDI_PACKAGE_NAME = 'redi' class ReportCourier(object): @@ -177,7 +177,7 @@ def updateSubjectDetails(root, subject_details): """ Helper method for #create_summary_report() Adds subject information to the xml tree which is later formated - by `bin/utils/report.xsl` into the html `table#subject_details"` + by `redi/utils/report.xsl` into the html `table#subject_details"` """ subjectsDetails = root[3] for key in sorted(subject_details.keys()): diff --git a/bin/utils/GetEmrData.py b/redi/utils/GetEmrData.py similarity index 100% rename from bin/utils/GetEmrData.py rename to redi/utils/GetEmrData.py diff --git a/bin/utils/SimpleConfigParser.py b/redi/utils/SimpleConfigParser.py similarity index 100% rename from bin/utils/SimpleConfigParser.py rename to redi/utils/SimpleConfigParser.py diff --git a/bin/utils/__init__.py b/redi/utils/__init__.py similarity index 100% rename from bin/utils/__init__.py rename to redi/utils/__init__.py diff --git a/bin/utils/component_id_to_loinc_code.xsd b/redi/utils/component_id_to_loinc_code.xsd similarity index 100% rename from bin/utils/component_id_to_loinc_code.xsd rename to redi/utils/component_id_to_loinc_code.xsd diff --git a/bin/utils/csv2xml.py b/redi/utils/csv2xml.py similarity index 100% rename from bin/utils/csv2xml.py rename to redi/utils/csv2xml.py diff --git a/bin/utils/date.month-abbreviation.template.xsl b/redi/utils/date.month-abbreviation.template.xsl similarity index 100% rename from bin/utils/date.month-abbreviation.template.xsl rename to redi/utils/date.month-abbreviation.template.xsl diff --git a/bin/utils/date.year.template.xsl b/redi/utils/date.year.template.xsl similarity index 100% rename from bin/utils/date.year.template.xsl rename to redi/utils/date.year.template.xsl diff --git a/bin/utils/rawxml.py b/redi/utils/rawxml.py similarity index 100% rename from bin/utils/rawxml.py rename to redi/utils/rawxml.py diff --git a/bin/utils/redcapClient.py b/redi/utils/redcapClient.py similarity index 100% rename from bin/utils/redcapClient.py rename to redi/utils/redcapClient.py diff --git a/bin/utils/redcap_records.py b/redi/utils/redcap_records.py similarity index 100% rename from bin/utils/redcap_records.py rename to redi/utils/redcap_records.py diff --git a/bin/utils/redi_email.py b/redi/utils/redi_email.py similarity index 100% rename from bin/utils/redi_email.py rename to redi/utils/redi_email.py diff --git a/bin/utils/report.xsl b/redi/utils/report.xsl similarity index 100% rename from bin/utils/report.xsl rename to redi/utils/report.xsl diff --git a/setup.cfg b/setup.cfg index d7d6b6f..b9f857c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -3,7 +3,7 @@ description-file = README.md [nosetests] tests=test.TestSuite -cover-package=bin +cover-package=redi # Erase previously collected coverage statistics before run cover-erase=TRUE diff --git a/setup.py b/setup.py index 5d805bc..bb2e02c 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ packages=find_packages(exclude=['test']), include_package_data=True, package_data={ - 'bin': ['utils/*.xsl', 'utils/*.xsd'], + 'redi': ['utils/*.xsl', 'utils/*.xsd'], 'redi': ['README.md'], }, url='https://github.com/ctsit/redi', @@ -42,7 +42,7 @@ ], entry_points={ 'console_scripts': [ - 'redi = bin.redi:main', + 'redi = redi.redi:main', ], }, test_suite='test.TestSuite', diff --git a/test/TestAddElementsToTree.py b/test/TestAddElementsToTree.py index 7577bf4..f501dc8 100644 --- a/test/TestAddElementsToTree.py +++ b/test/TestAddElementsToTree.py @@ -1,6 +1,6 @@ import unittest from lxml import etree -import redi +from redi import redi class TestAddElementsToTree(unittest.TestCase): diff --git a/test/TestConvertComponentIdToLoincCode.py b/test/TestConvertComponentIdToLoincCode.py index 25bdbd6..05e85db 100644 --- a/test/TestConvertComponentIdToLoincCode.py +++ b/test/TestConvertComponentIdToLoincCode.py @@ -8,7 +8,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestCopyDataToPersonFormEventTree.py b/test/TestCopyDataToPersonFormEventTree.py index cf3dd81..4ef20fe 100644 --- a/test/TestCopyDataToPersonFormEventTree.py +++ b/test/TestCopyDataToPersonFormEventTree.py @@ -3,7 +3,7 @@ from lxml import etree -from bin import redi +from redi import redi class TestCopyDataToPersonFormEventTree(unittest.TestCase): diff --git a/test/TestCreateEmptyEventTreeForStudy.py b/test/TestCreateEmptyEventTreeForStudy.py index c73f2e8..fe696e6 100644 --- a/test/TestCreateEmptyEventTreeForStudy.py +++ b/test/TestCreateEmptyEventTreeForStudy.py @@ -1,7 +1,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestCreateEmptyEventsForOneSubject.py b/test/TestCreateEmptyEventsForOneSubject.py index 498faf2..f76e1e0 100644 --- a/test/TestCreateEmptyEventsForOneSubject.py +++ b/test/TestCreateEmptyEventsForOneSubject.py @@ -1,7 +1,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestCreateImportDataJson.py b/test/TestCreateImportDataJson.py index e74a77b..9ea5ccf 100755 --- a/test/TestCreateImportDataJson.py +++ b/test/TestCreateImportDataJson.py @@ -17,8 +17,8 @@ from lxml import etree import logging import os -import redi -import redi_lib +from redi import redi +from redi import redi_lib file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 3481028..64e3388 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -7,8 +7,8 @@ from lxml import etree from StringIO import StringIO import time -import redi -import report +from redi import redi +from redi import report DEFAULT_DATA_DIRECTORY = os.getcwd() diff --git a/test/TestDaysSinceToday.py b/test/TestDaysSinceToday.py index 885b2e6..51dda0c 100644 --- a/test/TestDaysSinceToday.py +++ b/test/TestDaysSinceToday.py @@ -1,7 +1,7 @@ import unittest import datetime from datetime import timedelta -import redi_lib +from redi import redi_lib class TestDaysSinceToday(unittest.TestCase): diff --git a/test/TestGenerateOutput.py b/test/TestGenerateOutput.py index 782bce9..205818f 100755 --- a/test/TestGenerateOutput.py +++ b/test/TestGenerateOutput.py @@ -16,9 +16,9 @@ import unittest import os from lxml import etree -import redi -import redi_lib -from utils.redcapClient import RedcapClient +from redi import redi +from redi import redi_lib +from redi.utils.redcapClient import RedcapClient DEFAULT_DATA_DIRECTORY = os.getcwd() diff --git a/test/TestGetEMRData.py b/test/TestGetEMRData.py index 4472aff..9e58d4d 100644 --- a/test/TestGetEMRData.py +++ b/test/TestGetEMRData.py @@ -5,8 +5,8 @@ import tempfile import pysftp from mock import patch -import utils.GetEmrData as GetEmrData -from utils.GetEmrData import EmrFileAccessDetails +from redi.utils import GetEmrData +from redi.utils.GetEmrData import EmrFileAccessDetails import time from subprocess import Popen diff --git a/test/TestHandleREDCapResponse.py b/test/TestHandleREDCapResponse.py index ffa6e2e..a05b3d0 100644 --- a/test/TestHandleREDCapResponse.py +++ b/test/TestHandleREDCapResponse.py @@ -1,7 +1,7 @@ import unittest import os -import redi -import redi_lib +from redi import redi +from redi import redi_lib file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestLog.py b/test/TestLog.py index 7093026..dc55c2b 100644 --- a/test/TestLog.py +++ b/test/TestLog.py @@ -11,7 +11,7 @@ import unittest import os import sys -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestParseAll.py b/test/TestParseAll.py index c398b01..58397d0 100644 --- a/test/TestParseAll.py +++ b/test/TestParseAll.py @@ -17,7 +17,7 @@ from lxml import etree import logging import os -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestParseRawXml.py b/test/TestParseRawXml.py index 2e9c766..fb40d70 100644 --- a/test/TestParseRawXml.py +++ b/test/TestParseRawXml.py @@ -8,7 +8,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestParseTranslationTable.py b/test/TestParseTranslationTable.py index feaa353..83c33a4 100644 --- a/test/TestParseTranslationTable.py +++ b/test/TestParseTranslationTable.py @@ -8,7 +8,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestPersonFormEventsRepository.py b/test/TestPersonFormEventsRepository.py index b59fdc5..850d8c4 100644 --- a/test/TestPersonFormEventsRepository.py +++ b/test/TestPersonFormEventsRepository.py @@ -3,7 +3,7 @@ from lxml import etree -from bin.redi import PersonFormEventsRepository +from redi.redi import PersonFormEventsRepository class TestPersonFormEventsRepository(unittest.TestCase): diff --git a/test/TestRateLimiter.py b/test/TestRateLimiter.py new file mode 100644 index 0000000..6aeac7c --- /dev/null +++ b/test/TestRateLimiter.py @@ -0,0 +1,54 @@ +import unittest +import datetime + +from redi.utils import redcapClient + + +class TestRateLimiter(unittest.TestCase): + + def test_throttle(self): + + class MockRedcapClient(object): + def get_data_from_redcap(self, records_to_fetch=None, + events_to_fetch=None, + fields_to_fetch=None, + forms_to_fetch=None, + return_format='xml'): + return 'Time to make the data' + + def send_data_to_redcap(self, data, overwrite=False): + return 'Time to send the data' + + client = redcapClient.ThrottledRedcapClient(MockRedcapClient(), + max_requests_per_minute=3, + api_calls_already_made=0) + + MINUTE = datetime.timedelta(seconds=3) + SECONDS = MINUTE.total_seconds() + + client._ThrottledRedcapClient__minute = MINUTE + + + started = datetime.datetime.now() + client.get_data_from_redcap() + client.get_data_from_redcap() + client.get_data_from_redcap() + self.assertTrue(lapsed_time_in_secs(since=started) < SECONDS) + self.assertEqual(3, len(client.requests)) + + client.get_data_from_redcap() + restarted = datetime.datetime.now() + self.assertTrue(lapsed_time_in_secs(since=started) > SECONDS) + self.assertEqual(1, len(client.requests)) + + client.get_data_from_redcap() + self.assertTrue(lapsed_time_in_secs(since=restarted) < SECONDS) + self.assertEqual(2, len(client.requests)) + + +def lapsed_time_in_secs(since): + return (datetime.datetime.now() - since).total_seconds() + + +if __name__ == '__main__': + unittest.main() diff --git a/test/TestReadConfig.py b/test/TestReadConfig.py index a03e4c4..9e64dc2 100644 --- a/test/TestReadConfig.py +++ b/test/TestReadConfig.py @@ -3,8 +3,8 @@ import shutil import os -from bin import redi -from bin.utils import SimpleConfigParser +from redi import redi +from redi.utils import SimpleConfigParser class TestReadConfig(unittest.TestCase): diff --git a/test/TestRediEmail.py b/test/TestRediEmail.py index 4ac3b5e..dc1e40f 100644 --- a/test/TestRediEmail.py +++ b/test/TestRediEmail.py @@ -1,16 +1,16 @@ import unittest import smtplib from mock import patch, call -import redi -from utils.rawxml import RawXml -from utils import redi_email +from redi import redi +from redi.utils.rawxml import RawXml +from redi.utils import redi_email class TestRediEmail(unittest.TestCase): """ Check functions in the `utils/redi_email` module To run individually: - $ PYTHONPATH=bin python test/TestRediEmail.py + $ PYTHONPATH=redi python test/TestRediEmail.py """ def setUp(self): diff --git a/test/TestResearchIdToRedcapId.py b/test/TestResearchIdToRedcapId.py index 318b88f..6af20ae 100644 --- a/test/TestResearchIdToRedcapId.py +++ b/test/TestResearchIdToRedcapId.py @@ -3,10 +3,10 @@ import os from lxml import etree from mock import patch -import redi -from utils import redi_email -from utils.redcapClient import RedcapClient -import utils.SimpleConfigParser as SimpleConfigParser +from redi import redi +from redi.utils import redi_email +from redi.utils.redcapClient import RedcapClient +from redi.utils import SimpleConfigParser from requests import RequestException file_dir = os.path.dirname(os.path.realpath(__file__)) diff --git a/test/TestResume.py b/test/TestResume.py index d17b0c3..df255c5 100644 --- a/test/TestResume.py +++ b/test/TestResume.py @@ -14,8 +14,8 @@ def delete(self): class FileDeleted(): pass - import bin.redi - redi = reload(bin.redi) + import redi.redi + redi = reload(redi.redi) redi._person_form_events_service = MockPersonFormEvents() redi._check_input_file = lambda *args: None @@ -38,8 +38,8 @@ def store(self, ignored): class FileStored(): pass - import bin.redi - redi = reload(bin.redi) + import redi.redi + redi = reload(redi.redi) redi._person_form_events_service = MockPersonFormEvents() redi._check_input_file = lambda *args: None @@ -64,8 +64,8 @@ def fetch(self): class DataFetched(): pass - import bin.redi - redi = reload(bin.redi) + import redi.redi + redi = reload(redi.redi) redi._person_form_events_service = MockPersonFormEvents() redi._check_input_file = lambda *args: None diff --git a/test/TestSendDatatoRedcap.py b/test/TestSendDatatoRedcap.py index 730ed19..9cec4f7 100644 --- a/test/TestSendDatatoRedcap.py +++ b/test/TestSendDatatoRedcap.py @@ -9,7 +9,7 @@ import unittest import os from wsgiref.simple_server import make_server -import redi +from redi import redi import thread file_dir = os.path.dirname(os.path.realpath(__file__)) diff --git a/test/TestSortElementTree.py b/test/TestSortElementTree.py index cf31106..7508ea2 100644 --- a/test/TestSortElementTree.py +++ b/test/TestSortElementTree.py @@ -1,7 +1,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateDataFromLookup.py b/test/TestUpdateDataFromLookup.py index 477daf9..fccef21 100644 --- a/test/TestUpdateDataFromLookup.py +++ b/test/TestUpdateDataFromLookup.py @@ -1,7 +1,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateEventName.py b/test/TestUpdateEventName.py index 1114218..1f3fff5 100644 --- a/test/TestUpdateEventName.py +++ b/test/TestUpdateEventName.py @@ -1,7 +1,7 @@ import unittest from lxml import etree import os -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateFormCompletedFieldName.py b/test/TestUpdateFormCompletedFieldName.py index a32998c..4f9af79 100644 --- a/test/TestUpdateFormCompletedFieldName.py +++ b/test/TestUpdateFormCompletedFieldName.py @@ -1,7 +1,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateFormDateField.py b/test/TestUpdateFormDateField.py index 3792776..c2f6957 100644 --- a/test/TestUpdateFormDateField.py +++ b/test/TestUpdateFormDateField.py @@ -9,7 +9,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateFormImported.py b/test/TestUpdateFormImported.py index 3d335ec..e97abeb 100644 --- a/test/TestUpdateFormImported.py +++ b/test/TestUpdateFormImported.py @@ -1,7 +1,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateRedcapFieldNameValueAndUnits.py b/test/TestUpdateRedcapFieldNameValueAndUnits.py index 9e05a2e..a71330d 100644 --- a/test/TestUpdateRedcapFieldNameValueAndUnits.py +++ b/test/TestUpdateRedcapFieldNameValueAndUnits.py @@ -1,7 +1,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateRedcapForm.py b/test/TestUpdateRedcapForm.py index e8b4642..56e7b8e 100644 --- a/test/TestUpdateRedcapForm.py +++ b/test/TestUpdateRedcapForm.py @@ -1,6 +1,6 @@ import unittest from lxml import etree -import redi +from redi import redi class TestUpdateRedcapForm(unittest.TestCase): diff --git a/test/TestUpdateStatusField.py b/test/TestUpdateStatusField.py index cab6a94..10b6256 100644 --- a/test/TestUpdateStatusField.py +++ b/test/TestUpdateStatusField.py @@ -1,7 +1,7 @@ import unittest from lxml import etree import os -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestUpdateTimestamp.py b/test/TestUpdateTimestamp.py index d68c44d..aa0ecfd 100644 --- a/test/TestUpdateTimestamp.py +++ b/test/TestUpdateTimestamp.py @@ -12,7 +12,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestValidateXmlFleAndExtractData.py b/test/TestValidateXmlFleAndExtractData.py index d50b161..a9d6aea 100644 --- a/test/TestValidateXmlFleAndExtractData.py +++ b/test/TestValidateXmlFleAndExtractData.py @@ -8,7 +8,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") diff --git a/test/TestVerifyAndCorrectCollectionDate.py b/test/TestVerifyAndCorrectCollectionDate.py index c7b7777..1ff67b9 100644 --- a/test/TestVerifyAndCorrectCollectionDate.py +++ b/test/TestVerifyAndCorrectCollectionDate.py @@ -11,9 +11,9 @@ file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") proj_root = os.path.abspath(goal_dir)+'/' -sys.path.append(proj_root + 'bin/') +sys.path.append(proj_root + 'redi/') from lxml import etree -import redi +from redi import redi DEFAULT_DATA_DIRECTORY = os.getcwd() diff --git a/test/TestWriteToFile.py b/test/TestWriteToFile.py index 32d5a74..06b5dcd 100755 --- a/test/TestWriteToFile.py +++ b/test/TestWriteToFile.py @@ -9,7 +9,7 @@ import unittest import os from lxml import etree -import redi +from redi import redi file_dir = os.path.dirname(os.path.realpath(__file__)) goal_dir = os.path.join(file_dir, "../") From 563b5582dc320fca0db79a53a6b71685cc3ef1a3 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Wed, 5 Nov 2014 15:03:02 -0500 Subject: [PATCH 41/51] Remove file accidentally committed. --- test/TestRateLimiter.py | 54 ----------------------------------------- 1 file changed, 54 deletions(-) delete mode 100644 test/TestRateLimiter.py diff --git a/test/TestRateLimiter.py b/test/TestRateLimiter.py deleted file mode 100644 index 6aeac7c..0000000 --- a/test/TestRateLimiter.py +++ /dev/null @@ -1,54 +0,0 @@ -import unittest -import datetime - -from redi.utils import redcapClient - - -class TestRateLimiter(unittest.TestCase): - - def test_throttle(self): - - class MockRedcapClient(object): - def get_data_from_redcap(self, records_to_fetch=None, - events_to_fetch=None, - fields_to_fetch=None, - forms_to_fetch=None, - return_format='xml'): - return 'Time to make the data' - - def send_data_to_redcap(self, data, overwrite=False): - return 'Time to send the data' - - client = redcapClient.ThrottledRedcapClient(MockRedcapClient(), - max_requests_per_minute=3, - api_calls_already_made=0) - - MINUTE = datetime.timedelta(seconds=3) - SECONDS = MINUTE.total_seconds() - - client._ThrottledRedcapClient__minute = MINUTE - - - started = datetime.datetime.now() - client.get_data_from_redcap() - client.get_data_from_redcap() - client.get_data_from_redcap() - self.assertTrue(lapsed_time_in_secs(since=started) < SECONDS) - self.assertEqual(3, len(client.requests)) - - client.get_data_from_redcap() - restarted = datetime.datetime.now() - self.assertTrue(lapsed_time_in_secs(since=started) > SECONDS) - self.assertEqual(1, len(client.requests)) - - client.get_data_from_redcap() - self.assertTrue(lapsed_time_in_secs(since=restarted) < SECONDS) - self.assertEqual(2, len(client.requests)) - - -def lapsed_time_in_secs(since): - return (datetime.datetime.now() - since).total_seconds() - - -if __name__ == '__main__': - unittest.main() From e6132fd9ccab4ba729b411dde143bcbb8982ffb5 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Thu, 13 Nov 2014 15:38:53 -0500 Subject: [PATCH 42/51] Add script that generates enrollment records --- scripts/create_enrollment_csv.bash | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 scripts/create_enrollment_csv.bash diff --git a/scripts/create_enrollment_csv.bash b/scripts/create_enrollment_csv.bash new file mode 100644 index 0000000..78f67ef --- /dev/null +++ b/scripts/create_enrollment_csv.bash @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Generates subject enrollment records which can be used with the Sample # +# Project. +# +# Example: +# +# bash add_subjects.bash 10 > ../config-example/enrollment_test_data.csv +# + +echo "record_id,redcap_event_name,c2826694,c1301894,c2985782,c0806020,enrollment_complete" + +for i in $(seq 1 $1) +do + echo "\"$i\",\"1_arm_1\",\"$i\",\"${i}007\",\"2112-01-03\",\"2113-01-01\",2"; +done + From 78f69e8fe70efc2d5e2663f83e581795be157974 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Mon, 17 Nov 2014 12:52:20 -0500 Subject: [PATCH 43/51] Improve the marking of sent events In generate_output(), we were marking events as sent by changing the XML tree and saving it for each form event sent. This change replaces it with a "sent event index" which simply writes out the form event to a file and loads it back in if we use the --resume switch. I've seen a 12% speed improvement during testing. --- bin/redi.py | 54 ++++++++++++++++++++++++++++---------- bin/redi_lib.py | 20 +++++++------- test/TestGenerateOutput.py | 19 +++++++++++--- test/TestSentEventIndex.py | 44 +++++++++++++++++++++++++++++++ test/TestSuite.py | 3 +++ 5 files changed, 111 insertions(+), 29 deletions(-) create mode 100644 test/TestSentEventIndex.py diff --git a/bin/redi.py b/bin/redi.py index 5c2deeb..774382a 100755 --- a/bin/redi.py +++ b/bin/redi.py @@ -213,10 +213,12 @@ def _fetch_run_data(data_folder): person_form_event_tree_with_data = _person_form_events_service.fetch() alert_summary = _load(os.path.join(data_folder, 'alert_summary.obj')) rule_errors = _load(os.path.join(data_folder, 'rule_errors.obj')) - collection_date_summary_dict = _load(os.path.join(data_folder, 'collection_date_summary_dict.obj')) + collection_date_summary_dict = _load( + os.path.join(data_folder, 'collection_date_summary_dict.obj')) + sent_events = SentEventIndex(os.path.join(data_folder, 'sent_events.obj')) - return alert_summary, person_form_event_tree_with_data, rule_errors,\ - collection_date_summary_dict + return (alert_summary, person_form_event_tree_with_data, rule_errors, + collection_date_summary_dict, sent_events) def _load(path): @@ -224,12 +226,15 @@ def _load(path): return pickle.load(fp) -def _store_run_data(data_folder, alert_summary,\ - person_form_event_tree_with_data, rule_errors, collection_date_summary_dict): +def _store_run_data(data_folder, alert_summary, + person_form_event_tree_with_data, rule_errors, + collection_date_summary_dict): _person_form_events_service.store(person_form_event_tree_with_data) _save(alert_summary, os.path.join(data_folder, 'alert_summary.obj')) _save(rule_errors, os.path.join(data_folder, 'rule_errors.obj')) - _save(collection_date_summary_dict, os.path.join(data_folder, 'collection_date_summary_dict.obj')) + _save(collection_date_summary_dict, + os.path.join(data_folder, 'collection_date_summary_dict.obj')) + _save([], os.path.join(data_folder, 'sent_events.obj')) def _save(obj, path): @@ -301,26 +306,26 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, person_form_event_tree_with_data, rule_errors, collection_date_summary_dict) - alert_summary, person_form_event_tree_with_data, rule_errors, collection_date_summary_dict = \ - _fetch_run_data(data_folder) + (alert_summary, person_form_event_tree_with_data, rule_errors, + collection_date_summary_dict, sent_events) = _fetch_run_data(data_folder) # Data will be sent to REDCap server and email will be sent only if # redi.py is not executing in dry run state. if not dry_run: - unsent_events = person_form_event_tree_with_data.xpath("//event/status[.='unsent']") + all_form_events = person_form_event_tree_with_data.xpath("//event") # Use the new method to communicate with REDCap report_data = redi_lib.generate_output( person_form_event_tree_with_data, redcap_client, - settings.rate_limiter_value_in_redcap, _person_form_events_service, - skip_blanks) + settings.rate_limiter_value_in_redcap, sent_events, skip_blanks) # write person_form_event_tree to file write_element_tree_to_file(person_form_event_tree_with_data,\ os.path.join(data_folder, 'person_form_event_tree_with_data.xml')) - sent_events = person_form_event_tree_with_data.xpath("//event/status[.='sent']") - if len(unsent_events) != len(sent_events): - logger.warning('Some of the events are not sent to the redcap. Please check event statuses in '+data_folder+'person_form_event_tree_with_data.xml') + if len(all_form_events) != len(sent_events): + logger.warning('Some of the events were not sent to the REDCap ' + 'server. Please check the event statuses in ' + '{0}/sent_events.obj'.format(data_folder)) # Add any errors from running the rules to the report map(logger.warning, rule_errors) @@ -1866,5 +1871,26 @@ def store(self, pfe_tree): pretty_print=True) +class SentEventIndex(object): + + def __init__(self, filename, writer=None, reader=None): + self.filename = filename + loader = reader or _load + self._persist = writer or _save + self.sent_events = loader(filename) + + def __len__(self): + return len(self.sent_events) + + def mark_sent(self, study_id_key, form_name, event_name): + form_event_key = study_id_key, form_name, event_name + self.sent_events.append(form_event_key) + self._persist(self.sent_events, self.filename) + + def was_sent(self, study_id_key, form_name, event_name): + form_event_key = study_id_key, form_name, event_name + return form_event_key in self.sent_events + + if __name__ == "__main__": main() diff --git a/bin/redi_lib.py b/bin/redi_lib.py index 5ebee0a..d46a4e4 100644 --- a/bin/redi_lib.py +++ b/bin/redi_lib.py @@ -99,7 +99,8 @@ def get_child_text_safely(etree, ele): """ -def generate_output(person_tree, redcap_client, rate_limit, data_repository, skip_blanks=False): +def generate_output(person_tree, redcap_client, rate_limit, sent_events, + skip_blanks=False): # the global dictionary to be returned report_data = { @@ -171,8 +172,11 @@ def generate_output(person_tree, redcap_client, rate_limit, data_repository, ski # loop through the events of one form for event in form.xpath('event'): - event_status = event.findtext('status') - if event_status == 'sent': + event_name = event.findtext('name', '') + assert event_name, "Missing name for form event" + + if sent_events.was_sent(study_id_key, form_name, event_name): + logger.debug("Skipping previously sent " + event_name) continue event_count += 1 @@ -209,14 +213,8 @@ def generate_output(person_tree, redcap_client, rate_limit, data_repository, ski try: found_error = False response = redcap_client.send_data_to_redcap([json_data_dict], overwrite = True) - status = event.find('status') - if status is not None: - status.text = 'sent' - else: - status_element = etree.Element("status") - status_element.text = 'sent' - event.append(status_element) - data_repository.store(person_tree) + sent_events.mark_sent(study_id_key, form_name, event_name) + logger.debug("Sent " + event_name) except RedcapError as e: found_error = handle_errors_in_redcap_xml_response( e.message, diff --git a/test/TestGenerateOutput.py b/test/TestGenerateOutput.py index 782bce9..0557294 100755 --- a/test/TestGenerateOutput.py +++ b/test/TestGenerateOutput.py @@ -156,9 +156,20 @@ def test_person_form_event(self): 'errors' : [] } - class MockDataRepository(object): - def store(self, data): - pass + class MockSentEventIndex(object): + def __init__(self): + self.sent_events = [] + + def __len__(self): + return len(self.sent_events) + + def mark_sent(self, study_id_key, form_name, event_name): + form_event_key = study_id_key, form_name, event_name + self.sent_events.append(form_event_key) + + def was_sent(self, study_id_key, form_name, event_name): + form_event_key = study_id_key, form_name, event_name + return form_event_key in self.sent_events class MockRedcapClient(RedcapClient): def __init__(self): @@ -176,7 +187,7 @@ def send_data_to_redcap(self, data, overwrite=False): etree_1 = etree.ElementTree(etree.fromstring(string_1_xml)) result = redi_lib.generate_output(etree_1, MockRedcapClient(), 500, - MockDataRepository()) + MockSentEventIndex()) self.assertEqual(report_data['total_subjects'], result['total_subjects']) self.assertEqual(report_data['form_details'], result['form_details']) self.assertEqual(report_data['subject_details'], result['subject_details']) diff --git a/test/TestSentEventIndex.py b/test/TestSentEventIndex.py new file mode 100644 index 0000000..4e623ae --- /dev/null +++ b/test/TestSentEventIndex.py @@ -0,0 +1,44 @@ +""" +Verifies the functionality of bin.redi.SentEventIndex +""" +import unittest + +from bin import redi + + +class TestSentEventIndex(unittest.TestCase): + + def test_len(self): + index = redi.SentEventIndex("", writer=lambda o, f: None, + reader=lambda f: []) + self.assertEqual(0, len(index)) + + index.mark_sent("007", "new_hire", "1_arm_1") + index.mark_sent("007", "new_hire", "2_arm_1") + + self.assertEqual(2, len(index)) + + def test_was_sent(self): + index = redi.SentEventIndex("", writer=lambda o, f: None, + reader=lambda f: []) + + index.mark_sent("007", "new_hire", "1_arm_1") + + self.assertTrue(index.was_sent("007", "new_hire", "1_arm_1")) + + def test_mark_sent(self): + self.__tally = 0 + + index = redi.SentEventIndex("", self.__dummy_writer, + reader=lambda f: []) + index.mark_sent("007", "new_hire", "1_arm_1") + index.mark_sent("007", "new_hire", "2_arm_1") + + self.assertEqual(2, self.__tally) + + def __dummy_writer(self, obj, filename): + self.__tally += 1 + + +if __name__ == "__main__": + unittest.main() diff --git a/test/TestSuite.py b/test/TestSuite.py index eb86b04..5f06065 100755 --- a/test/TestSuite.py +++ b/test/TestSuite.py @@ -41,6 +41,8 @@ from TestPersonFormEventsRepository import TestPersonFormEventsRepository from TestVerifyAndCorrectCollectionDate import TestVerifyAndCorrectCollectionDate from TestRediEmail import TestRediEmail +from TestSentEventIndex import TestSentEventIndex + class redi_suite(unittest.TestSuite): @@ -68,6 +70,7 @@ def suite(self): redi_test_suite.addTest(TestCreateEmptyEventTreeForStudy) redi_test_suite.addTest(TestVerifyAndCorrectCollectionDate) redi_test_suite.addTest(TestParseAll) + redi_test_suite.addTest(TestSentEventIndex) # The redesign functions May 2014 redi_test_suite.addTest(TestCreateImportDataJson) From c44af05087bb8fdbe27fd2ccbbaaecc778cbe3d6 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Tue, 18 Nov 2014 12:52:05 -0500 Subject: [PATCH 44/51] Update warning about unsent events Updated the warning message to check the log file first. Also, after discussing it with some of the other developers, I decided to make the sent_events file more human-readable. In doing so, I implemented a faster approach to persisting the list of sent events. Instead of pickling the list each time, the latest event simply has to be appended to the file. --- redi/redi.py | 40 ++++++++++++++++++++++++++++++-------- test/TestSentEventIndex.py | 8 ++++---- 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/redi/redi.py b/redi/redi.py index c3c5e45..bd5fc72 100755 --- a/redi/redi.py +++ b/redi/redi.py @@ -215,7 +215,7 @@ def _fetch_run_data(data_folder): rule_errors = _load(os.path.join(data_folder, 'rule_errors.obj')) collection_date_summary_dict = _load( os.path.join(data_folder, 'collection_date_summary_dict.obj')) - sent_events = SentEventIndex(os.path.join(data_folder, 'sent_events.obj')) + sent_events = SentEvents(os.path.join(data_folder, 'sent_events.idx')) return (alert_summary, person_form_event_tree_with_data, rule_errors, collection_date_summary_dict, sent_events) @@ -234,7 +234,6 @@ def _store_run_data(data_folder, alert_summary, _save(rule_errors, os.path.join(data_folder, 'rule_errors.obj')) _save(collection_date_summary_dict, os.path.join(data_folder, 'collection_date_summary_dict.obj')) - _save([], os.path.join(data_folder, 'sent_events.obj')) def _save(obj, path): @@ -323,9 +322,9 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, write_element_tree_to_file(person_form_event_tree_with_data,\ os.path.join(data_folder, 'person_form_event_tree_with_data.xml')) if len(all_form_events) != len(sent_events): - logger.warning('Some of the events were not sent to the REDCap ' - 'server. Please check the event statuses in ' - '{0}/sent_events.obj'.format(data_folder)) + logger.warning( + 'Some of the events were not sent to the REDCap server. Please ' + "check the log file or {0}/sent_events.idx".format(data_folder)) # Add any errors from running the rules to the report map(logger.warning, rule_errors) @@ -1871,12 +1870,18 @@ def store(self, pfe_tree): pretty_print=True) -class SentEventIndex(object): +class SentEvents(object): + """ + List of form events that have been sent to REDCap + :param filename: file location + :param writer: delegate called after an event has been marked sent + :param reader: function to read previously sent events from disk + """ def __init__(self, filename, writer=None, reader=None): self.filename = filename - loader = reader or _load - self._persist = writer or _save + self._persist = writer or self._append + loader = reader or self._readall self.sent_events = loader(filename) def __len__(self): @@ -1891,6 +1896,25 @@ def was_sent(self, study_id_key, form_name, event_name): form_event_key = study_id_key, form_name, event_name return form_event_key in self.sent_events + @staticmethod + def _readall(filename): + # Reads events as a list of tuples (default reader delegate) + try: + with open(filename, 'r') as fp: + return [ast.literal_eval(line) for line in fp] + except IOError: + return [] + + @staticmethod + def _append(values, filename): + # Appends the last value to the file (default handler of on_marked_sent) + if not values: + return + + with open(filename, 'a') as fp: + fp.write(str(values[-1])) + fp.write(os.linesep) + if __name__ == "__main__": main() diff --git a/test/TestSentEventIndex.py b/test/TestSentEventIndex.py index 4e623ae..df2b3e2 100644 --- a/test/TestSentEventIndex.py +++ b/test/TestSentEventIndex.py @@ -3,13 +3,13 @@ """ import unittest -from bin import redi +from redi import redi class TestSentEventIndex(unittest.TestCase): def test_len(self): - index = redi.SentEventIndex("", writer=lambda o, f: None, + index = redi.SentEvents("", writer=lambda o, f: None, reader=lambda f: []) self.assertEqual(0, len(index)) @@ -19,7 +19,7 @@ def test_len(self): self.assertEqual(2, len(index)) def test_was_sent(self): - index = redi.SentEventIndex("", writer=lambda o, f: None, + index = redi.SentEvents("", writer=lambda o, f: None, reader=lambda f: []) index.mark_sent("007", "new_hire", "1_arm_1") @@ -29,7 +29,7 @@ def test_was_sent(self): def test_mark_sent(self): self.__tally = 0 - index = redi.SentEventIndex("", self.__dummy_writer, + index = redi.SentEvents("", self.__dummy_writer, reader=lambda f: []) index.mark_sent("007", "new_hire", "1_arm_1") index.mark_sent("007", "new_hire", "2_arm_1") From 95ed9a80f3ea988990dece9f6c87908f53e21fd6 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Tue, 18 Nov 2014 13:30:09 -0500 Subject: [PATCH 45/51] Delete sent_events.idx before a new run --- redi/redi.py | 1 + 1 file changed, 1 insertion(+) diff --git a/redi/redi.py b/redi/redi.py index bd5fc72..196ea6d 100755 --- a/redi/redi.py +++ b/redi/redi.py @@ -198,6 +198,7 @@ def _delete_last_runs_data(data_folder): _remove(os.path.join(data_folder, 'alert_summary.obj')) _remove(os.path.join(data_folder, 'rule_errors.obj')) _remove(os.path.join(data_folder, 'collection_date_summary_dict.obj')) + _remove(os.path.join(data_folder, 'sent_events.idx')) def _remove(path): From 7035589a91cb23565771a509a8ffa6a338759672 Mon Sep 17 00:00:00 2001 From: Andrei Sura Date: Tue, 18 Nov 2014 16:35:04 -0500 Subject: [PATCH 46/51] Manual merge from https://github.com/ctsit/redi/pull/112 --- MANIFEST.in | 4 +- Makefile | 4 + docs/conf.py | 2 +- redi/batch.py | 378 +++++++++++++++++ redi/redi.py | 23 +- redi/redi_lib.py | 690 ------------------------------- redi/upload.py | 234 +++++++++++ redi/utils/throttle.py | 63 +++ test/TestCreateImportDataJson.py | 42 +- test/TestDaysSinceToday.py | 8 +- test/TestGenerateOutput.py | 6 +- test/TestHandleREDCapResponse.py | 14 +- test/TestParseAll.py | 17 +- test/TestSuite.py | 4 + test/TestThrottle.py | 43 ++ test/TestUpdateStatusField.py | 15 +- test/TestWriteToFile.py | 40 +- vagrant/Makefile | 4 +- 18 files changed, 785 insertions(+), 806 deletions(-) create mode 100644 redi/batch.py delete mode 100644 redi/redi_lib.py create mode 100644 redi/upload.py create mode 100644 redi/utils/throttle.py create mode 100644 test/TestThrottle.py diff --git a/MANIFEST.in b/MANIFEST.in index 2f2c12b..885a55c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,7 @@ include LICENSE.txt include AUTHORS.md include README.md include setup.py -recursive-include bin *.py +recursive-include redi *.py recursive-include test *.py -recursive-include doc * +recursive-include docs * diff --git a/Makefile b/Makefile index 23fce46..fc016a2 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,9 @@ help: test: tests tests: coverage [ ! -d config/rules ] || python -munittest discover config/rules + rm -f .coverage + rm -rf cover/ + rm -f coverage.xml nosetests.xml coverage: ARCHFLAGS=$(ARCHFLAGS) python setup.py nosetests @@ -56,6 +59,7 @@ clean: rm -f person_form_event_tree_with_data.xml rm -rf vagrant/data/ rm -f vagrant/redi.db + rm -f config-example/report.xml pypi: #https://pythonhosted.org/Distutils2/distutils/packageindex.html diff --git a/docs/conf.py b/docs/conf.py index f42570a..4e8aa45 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -22,7 +22,7 @@ sys.path.insert(0, os.path.abspath('..')) from redi import redi -from redi import redi_lib +from redi import upload # -- General configuration ------------------------------------------------ diff --git a/redi/batch.py b/redi/batch.py new file mode 100644 index 0000000..a567cde --- /dev/null +++ b/redi/batch.py @@ -0,0 +1,378 @@ +""" +Functions related to the RediBatch database +""" + +__author__ = "University of Florida CTS-IT Team" +__copyright__ = "Copyright 2014, University of Florida" +__license__ = "BSD 3-Clause" + +import datetime +import hashlib +import logging +import os +import sqlite3 as lite +import stat +import sys +import time + +from lxml import etree + +from utils import redi_email +from utils.rawxml import RawXml + + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + +""" +@see #check_input_file() + +The first time we run the app there is no SQLite file +where to store the md5 sums of the input file. +This function creates an empty RediBatch in the SQLite +file specified as `db_path` + +@return True if the database file was properly created with an empty table +""" + +def create_empty_md5_database(db_path) : + if os.path.exists(db_path) : + logger.warn('The file with name ' + db_path + ' already exists') + #return + + try : + logger.info('Opening the file:' + db_path) + fresh_file = open(db_path, 'w') + fresh_file.close() + os.chmod(db_path, stat.S_IRUSR | stat.S_IWUSR) + time.sleep(5) + + except IOError as e: + logger.error("I/O error: " + e.strerror + ' for file: ' + db_path) + return False + success = create_empty_table(db_path) + return success + +""" +Helper for #create_empty_md5_database() +""" + +def create_empty_table(db_path) : + logger.info('exec create_empty_table') + db = None + try: + db = lite.connect(db_path) + cur = db.cursor() + sql = """CREATE TABLE RediBatch ( + rbID INTEGER PRIMARY KEY AUTOINCREMENT, + rbStartTime TEXT NOT NULL, + rbEndTime TEXT, + rbStatus TEXT, + rbMd5Sum TEXT NOT NULL +) + """ + cur.execute(sql) + + except lite.Error as e: + logger.error("SQLite error in create_empty_table(): " + e.args[0]) + return False + finally: + if db: + db.close() + logger.info('success create_empty_table') + return True + + +""" +Use this function to set the `row_factory` +attribute of the database connection +""" + + +def dict_factory(cursor, row): + d = {} + for idx, col in enumerate(cursor.description): + d[col[0]] = row[idx] + return d + +""" +@see bin/redi.py#main() +@return a dictionary representation of the batch row for the current run + +Check the md5sum of the input file + - if the sum *has changed* then continue the data processing and store a row + in the SQLite database with `batch status= batch_started/ batch_completed` + + - if the sum *did not change* then check the config option `batch_warning_days`: + - if limit = -1 then continue execution (ignore the limit) + - if days_passed > limit then stop the process and email the `redi_admin` + +""" + + +def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, project): + batch = None + + if not os.path.exists(db_path) : + create_empty_md5_database(db_path) + + new_md5ive = get_md5_input_file(raw_xml_file) + new_msg = 'Using SQLite file: %s to store input file: %s md5 sum: %s' % ( + db_path, raw_xml_file, new_md5ive) + logger.info(new_msg) + + old_batch = get_last_batch(db_path) + old_md5ive = None + if old_batch: + old_md5ive = old_batch['rbMd5Sum'] + logger.info('Old md5 sum for the input file is: ' + old_md5ive) + else: + # this is the first time the checksum feature is used + logger.info( + "There is no old md5 recorded yet for the input file. Continue data import...") + batch = add_batch_entry(db_path, new_md5ive) + record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( + batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) + logger.info(record_msg) + return batch + + if old_md5ive != new_md5ive: + # the data has changed... insert a new batch entry + batch = add_batch_entry(db_path, new_md5ive) + record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( + batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) + logger.info(record_msg) + return batch + else: + days_since_today = get_days_since_today(old_batch['rbStartTime']) + # TODO: refactor code to use ConfigParser.RawConfigParser in order to + # preserve data types + + if (days_since_today > int(batch_warning_days)): + raw_xml = RawXml(project, raw_xml_file) + msg_file_details = "\nXML file details: " + raw_xml.get_info() + logger.info('Last import was started on: %s which is more than the limit of %s' % (old_batch['rbStartTime'], batch_warning_days)) + if (-1 == int(batch_warning_days)): + msg_continue = """ + The configuration `batch_warning_days = -1` indicates that we want to continue + execution even if the input file did not change + """ + msg_file_details + logger.info(msg_continue) + else: + + msg_quit = "The input file did not change in the past: %s days. Stop data import." % batch_warning_days + logger.critical(msg_quit + msg_file_details) + redi_email.send_email_input_data_unchanged(email_settings, raw_xml) + sys.exit() + else: + logger.info('Reusing md5 entry: ' + str(old_batch['rbID'])) + # return the old batch so we can update the status + return old_batch + + + +""" +Retrieve the row corresponding to the last REDI batch completed +""" + + +def get_last_batch(db_path): + db = None + try: + db = lite.connect(db_path) + db.row_factory = dict_factory + cur = db.cursor() + sql = """ +SELECT + rbID, rbStartTime, rbEndTime, rbMd5Sum +FROM + RediBatch +ORDER BY rbID DESC +LIMIT 1 +""" + cur.execute(sql) + batch = cur.fetchone() + + except lite.Error as e: + logger.error("SQLite error in get_last_batch() for file %s - %s" % (db_path, e.args[0])) + return None + finally: + if db: + db.close() + + return batch + + +""" +Retrieve the row corresponding to the specified primary key +""" + + +def get_batch_by_id(db_path, batch_id): + db = None + try: + db = lite.connect(db_path) + db.row_factory = dict_factory + cur = db.cursor() + sql = """ +SELECT + rbID, rbStartTime, rbEndTime, rbMd5Sum +FROM + RediBatch +WHERE + rbID = ? +LIMIT 1 +""" + cur.execute(sql, (str(batch_id), )) + batch = cur.fetchone() + + except lite.Error as e: + logger.exception("SQLite error in get_batch_by_id(): %s:" % e.args[0]) + raise + # sys.exit(1) + finally: + if db: + db.close() + + return batch + + +""" +@see #check_input_file() +@see https://docs.python.org/2/library/hashlib.html +@see https://docs.python.org/2/library/sqlite3.html#sqlite3.Connection.row_factory + +Returns the md5 sum for the redi input file +""" + + +def get_md5_input_file(input_file): + if not os.path.exists(input_file): + raise Exception('Input file not found at: ' + input_file) + + logger.info('Computing md5 sum for: ' + input_file) + + # open the file in binary mode + f = open(input_file, 'rb') + chunk_size = 2 ** 20 + md5 = hashlib.md5() + + # read the input file in 1MB pieces + while True: + chunk = f.read(chunk_size) + if not chunk: + break + md5.update(chunk) + + return md5.hexdigest() + + +""" +@see #check_input_file() +@param db_path - the SQLite file +@param md5 - the md5 sum to be inserted +""" + + +def add_batch_entry(db_path, md5): + logger.info('Execute: add_batch_entry()') + batch = None + + db = None + try: + db = lite.connect(db_path) + db.row_factory = dict_factory + cur = db.cursor() + sql = """ +INSERT INTO RediBatch + (rbStartTime, rbEndTime, rbStatus, rbMd5Sum) +VALUES + ( ?, NULL, 'Started', ?) +""" + now = get_db_friendly_date_time() + cur.execute(sql, (now, md5)) + rbID = cur.lastrowid + db.commit() + batch = get_batch_by_id(db_path, rbID) + + except lite.Error as e: + logger.error("SQLite error in add_batch_entry() for file %s - %s" % (db_path, e.args[0])) + return None + finally: + if db: + db.close() + + return batch + + +""" +Update the status and the finish time of a specified batch entry in the SQLite db + +@return True if update succeeded, False otherwise +""" + + +def update_batch_entry(db_path, id, status, timestamp): + success = None + db = None + try: + db = lite.connect(db_path) + cur = db.cursor() + sql = """ +UPDATE + RediBatch +SET + rbEndTime = ? + , rbStatus = ? +WHERE + rbID = ? +""" + + cur.execute(sql, (timestamp, status, id)) + db.commit() + scuccess = True + except lite.Error as e: + logger.exception("SQLite error in update_batch_entry(): %s:" % e.args[0]) + success = False + finally: + if db: + db.close() + + return success + + +""" +@return string in format: "2014-06-24 01:23:24" +""" + + +def get_db_friendly_date_time(): + return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + +""" +@return string in format: 2014-06-24 +""" + + +def get_db_friendly_date(): + return datetime.date.today() + +""" +@return the number of days passed since the specified date +""" + + +def get_days_since_today(date_string): + num = None + other = datetime.datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S') + now = datetime.datetime.now() + delta = now - other + return delta.days + +""" +Helper function for debugging xml content +""" +def printxml(tree): + print etree.tostring(tree, pretty_print = True) + return diff --git a/redi/redi.py b/redi/redi.py index 30a880e..8954121 100755 --- a/redi/redi.py +++ b/redi/redi.py @@ -49,12 +49,14 @@ import imp import os import pkg_resources +import shutil from requests import RequestException from lxml import etree from docopt import docopt -import redi_lib +import batch +import upload import report from utils import redi_email from utils.redcapClient import RedcapClient @@ -280,7 +282,8 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, # status to `completed` an ste the `rbEndTime` email_settings = get_email_settings(settings) db_path = database_path - batch = _check_input_file(db_path, email_settings, raw_xml_file, settings) + current_batch = _check_input_file(db_path, email_settings, raw_xml_file, + settings) form_events_file = os.path.join(configuration_directory,\ settings.form_events_file) @@ -310,7 +313,7 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, unsent_events = person_form_event_tree_with_data.xpath("//event/status[.='unsent']") # Use the new method to communicate with REDCap - report_data = redi_lib.generate_output( + report_data = upload.generate_output( person_form_event_tree_with_data, redcap_client, settings.rate_limiter_value_in_redcap, _person_form_events_service, skip_blanks) @@ -334,18 +337,18 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, report_courier.deliver(html_str) - if batch: + if current_batch: # Update the batch row - done_timestamp = redi_lib.get_db_friendly_date_time() - redi_lib.update_batch_entry(db_path, - batch['rbID'], 'Completed', done_timestamp) + done_timestamp = batch.get_db_friendly_date_time() + batch.update_batch_entry(db_path, current_batch['rbID'], 'Completed', + done_timestamp) if dry_run: logger.info("End of dry run. All output files are ready for review"\ " in " + data_folder) if not do_keep_gen_files: - redi_lib.delete_temporary_folder(data_folder) + shutil.rmtree(data_folder) def _create_person_form_event_tree_with_data( @@ -499,7 +502,9 @@ def _create_person_form_event_tree_with_data( def _check_input_file(db_path, email_settings, raw_xml_file, settings): - return redi_lib.check_input_file(settings.batch_warning_days, db_path, email_settings, raw_xml_file, settings.project) + return batch.check_input_file(settings.batch_warning_days, db_path, + email_settings, raw_xml_file, + settings.project) def read_config(config_file, configuration_directory, file_list): diff --git a/redi/redi_lib.py b/redi/redi_lib.py deleted file mode 100644 index 89fca0d..0000000 --- a/redi/redi_lib.py +++ /dev/null @@ -1,690 +0,0 @@ -""" -redi_lib.py - - Stores a collection of utility functions used by redi.py -""" - -__author__ = "University of Florida CTS-IT Team" -__copyright__ = "Copyright 2014, University of Florida" -__license__ = "BSD 2-Clause" - -import datetime -import os -import stat -import time -import ast -from redcap import RedcapError -import tempfile -import sqlite3 as lite -from datetime import date -import hashlib -import utils.redi_email as redi_email -from utils.rawxml import RawXml -from requests import RequestException -from lxml import etree -import logging -import sys -logger = logging.getLogger(__name__) -logger.addHandler(logging.NullHandler()) - -DEFAULT_DATA_DIRECTORY = os.getcwd() - - - -""" -create_import_data_json: -This function converts data in event tree into json format. -Parameters: - import_data_dict: This parameter holds the event tree data - event_tree: This parameter holds the event tree data - -@see #generate_output() -""" - - -def create_import_data_json( - import_data_dict, - event_tree): - - root = event_tree - - event_name = root.find('name') - if event_name is None or not event_name.text: - raise Exception('Expected non-blank element event/name') - - import_data_dict['redcap_event_name'] = event_name.text - - event_field_value_list = root.xpath('//event/field/name') - - for name in event_field_value_list: - if name.text is None: - raise Exception( - 'Expected non-blank element event/field/name') - - # Match all fields to build a row for each - event_field_list = root.xpath('field') - contains_data = False - - for field in event_field_list: - val = get_child_text_safely(field, 'value') - import_data_dict[field.findtext('name')] = val - - if val and not contains_data: - contains_data = True - - return {'json_data': import_data_dict, 'contains_data': contains_data} - -""" -Convenience function -@see create_eav_output -""" - - -def get_child_text_safely(etree, ele): - ele = etree.find(ele) - if ele.text is None: - return '' - else: - return ele.text - -""" -Note: This function communicates with the redcap application. -Steps: - - loop for each person/form/event element - - generate a csv fragment `using create_eav_output` - - send csv fragment to REDCap using `send_eav_data_to_redcap` - - -@return the report_data dictionary -""" - - -def generate_output(person_tree, redcap_client, rate_limit, data_repository, skip_blanks=False): - - # the global dictionary to be returned - report_data = { - 'errors': [] - } - - """ - For each person we keep a count for each form type: - subject_details = array( - 'person_A' => array('form_1': 1, 'form_2': 10, ... - 'person_B' => array('form_1': 1, 'form_2': 10, ... - ... - ) - """ - subject_details = {} - - # For each form type we keep a global count - form_details = {} - - # count how many `person` elements are parsed - person_count = 0 - - root = person_tree.getroot() - persons = root.xpath('//person') - - rate_limiter_value_in_redcap = float(rate_limit) - - - ideal_time_per_request = 60 / float(rate_limiter_value_in_redcap) - time_stamp_after_request = 0 - - # main loop for each person - for person in persons: - time_begin = datetime.datetime.now() - person_count += 1 - study_id = (person.xpath('study_id') or [None])[0] - - if study_id is None: - raise Exception('Expected a valid value for study_id') - - # count how many csv fragments are created per person - event_count = 0 - logger.info('Start sending data for study_id: %s' % study_id.text) - - forms = person.xpath('./all_form_events/form') - - # loop through the forms of one person - for form in forms: - form_name = form.xpath('name')[0].text - form_key = 'Total_' + form_name + '_Forms' - study_id_key = study_id.text - - # init dictionary for a new person in (study_id) - if study_id_key not in subject_details: - subject_details[study_id_key] = {} - subject_details[study_id_key]['lab_id'] = person.get('lab_id') - - if not form_key in subject_details[study_id_key]: - subject_details[study_id_key][form_key] = 0 - - if form_key not in form_details: - form_details[form_key] = 0 - - logger.debug( - 'parsing study_id ' + - study_id.text + - ' form: ' + - form_name) - - # loop through the events of one form - for event in form.xpath('event'): - event_status = event.findtext('status') - if event_status == 'sent': - continue - event_count += 1 - - try: - import_dict = { - redcap_client.project.def_field: study_id.text} - import_dict = create_import_data_json( - import_dict, - event) - json_data_dict = import_dict['json_data'] - contains_data = import_dict['contains_data'] - - # If we're skipping blanks and this event is blank, we - # assume all following events are blank; therefore, break - # out of this for-loop and move on to the next form. - if skip_blanks and not contains_data: - break - - time_lapse_since_last_request = time.time( - ) - time_stamp_after_request - sleepTime = max( - ideal_time_per_request - - time_lapse_since_last_request, - 0) - # print 'Sleep for: %s seconds' % sleepTime - time.sleep(sleepTime) - - if (0 == event_count % 50): - logger.info('Requests sent: %s' % (event_count)) - - # to speedup testing uncomment the following line - # if (0 == event_count % 2) : continue - - try: - found_error = False - response = redcap_client.send_data_to_redcap([json_data_dict], overwrite = True) - status = event.find('status') - if status is not None: - status.text = 'sent' - else: - status_element = etree.Element("status") - status_element.text = 'sent' - event.append(status_element) - data_repository.store(person_tree) - except RedcapError as e: - found_error = handle_errors_in_redcap_xml_response( - e.message, - report_data) - - time_stamp_after_request = time.time() - - if contains_data: - if not found_error: - # if no errors encountered update event counters - subject_details[study_id_key][form_key] += 1 - form_details[form_key] += 1 - - except Exception as e: - logger.error(e.message) - raise - - time_end = datetime.datetime.now() - logger.info("Total execution time for study_id %s was %s" % (study_id_key, (time_end - time_begin))) - logger.info("Total REDCap requests sent: %s \n" % (event_count)) - - report_data.update({ - 'total_subjects': person_count, - 'form_details': form_details, - 'subject_details': subject_details, - 'errors': report_data['errors'] - }) - - logger.debug('report_data ' + repr(report_data)) - return report_data - -""" -handle_errors_in_redcap_xml_response: -This function checks for any errors in the redcap response and update report data if there are any errors. -Parameters: - redcap_response_xml: This parameter holds the redcap response passed to this function - report_data: This parameter holds the report data passed to this function - -""" - - -def handle_errors_in_redcap_xml_response(redcap_response, report_data): - # converting string to dictionary - response = ast.literal_eval(str(redcap_response)) - logger.debug('handling response from the REDCap') - try: - if 'error' in response: - for recordData in response['records']: - error_string = "Error writing to record " + recordData["record"] + " field " + recordData[ - "field_name"] + " Value " + recordData["value"] + ".Error Message: " + recordData["message"] - logger.info(error_string) - report_data['errors'].append(error_string) - else: - logger.error("REDCap response is in unknown format") - except KeyError as e: - logger.error(str(e)) - return True - - -# Convenience method for getting the first element -# Note: for printing an object can use: print repr(obj) -def get_first_item(aList): - if aList: - return aList[0] - return None - -""" -create_temp_dir_debug: -Creates a folder name with the following format: - ./out/out_YYYY_mm_dd:00:11:22 -""" - - -def create_temp_dir_debug(existing_folder=(DEFAULT_DATA_DIRECTORY + 'out')): - if not os.path.exists(existing_folder): - try: - os.makedirs(existing_folder) - except: - logger.exception( - "Folder cannot be created at the path " + - existing_folder) - raise - prefix = 'out_' + datetime.datetime.now().strftime('%Y_%m_%d-%H_%M_%S') - mydir = existing_folder + '/' + prefix - os.mkdir(mydir) - return mydir - -""" -get_temp_path: -This function returns a folder path. -If user enters yes along with the optional command line argument -k, then a folder will be created under project root having a timestamp for its name. The files stored under this folder will not be destroyed. -If user does not use the -k switch then a temporary folder is created at a random location. After the execution of the program, this folder is destroyed. -""" - - -def get_temp_path(do_keep_gen_files): - if do_keep_gen_files: - return create_temp_dir_debug() + '/' - else: - return tempfile.mkdtemp('/') - -""" -delete_temporary_folder: -deletes the temporary folder specified as argument along with its contents. -""" - - -def delete_temporary_folder(tmp_folder): - fileList = os.listdir(tmp_folder) - for fileName in fileList: - os.remove(tmp_folder + "/" + fileName) - try: - os.rmdir(tmp_folder) - except OSError: - logger.exception( - "Folder " + - tmp_folder + - "is not empty, hence cannot be deleted.") - raise - -""" -@see #check_input_file() - -The first time we run the app there is no SQLite file -where to store the md5 sums of the input file. -This function creates an empty RediBatch in the SQLite -file specified as `db_path` - -@return True if the database file was properly created with an empty table -""" - -def create_empty_md5_database(db_path) : - if os.path.exists(db_path) : - logger.warn('The file with name ' + db_path + ' already exists') - #return - - try : - logger.info('Opening the file:' + db_path) - fresh_file = open(db_path, 'w') - fresh_file.close() - os.chmod(db_path, stat.S_IRUSR | stat.S_IWUSR) - time.sleep(5) - - except IOError as e: - logger.error("I/O error: " + e.strerror + ' for file: ' + db_path) - return False - success = create_empty_table(db_path) - return success - -""" -Helper for #create_empty_md5_database() -""" - -def create_empty_table(db_path) : - logger.info('exec create_empty_table') - db = None - try: - db = lite.connect(db_path) - cur = db.cursor() - sql = """CREATE TABLE RediBatch ( - rbID INTEGER PRIMARY KEY AUTOINCREMENT, - rbStartTime TEXT NOT NULL, - rbEndTime TEXT, - rbStatus TEXT, - rbMd5Sum TEXT NOT NULL -) - """ - cur.execute(sql) - - except lite.Error as e: - logger.error("SQLite error in create_empty_table(): " + e.args[0]) - return False - finally: - if db: - db.close() - logger.info('success create_empty_table') - return True - - -""" -Use this function to set the `row_factory` -attribute of the database connection -""" - - -def dict_factory(cursor, row): - d = {} - for idx, col in enumerate(cursor.description): - d[col[0]] = row[idx] - return d - -""" -@see redi/redi.py#main() -@return a dictionary representation of the batch row for the current run - -Check the md5sum of the input file - - if the sum *has changed* then continue the data processing and store a row - in the SQLite database with `batch status= batch_started/ batch_completed` - - - if the sum *did not change* then check the config option `batch_warning_days`: - - if limit = -1 then continue execution (ignore the limit) - - if days_passed > limit then stop the process and email the `redi_admin` - -""" - - -def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, project): - batch = None - - if not os.path.exists(db_path) : - create_empty_md5_database(db_path) - - new_md5ive = get_md5_input_file(raw_xml_file) - new_msg = 'Using SQLite file: %s to store input file: %s md5 sum: %s' % ( - db_path, raw_xml_file, new_md5ive) - logger.info(new_msg) - - old_batch = get_last_batch(db_path) - old_md5ive = None - if old_batch: - old_md5ive = old_batch['rbMd5Sum'] - logger.info('Old md5 sum for the input file is: ' + old_md5ive) - else: - # this is the first time the checksum feature is used - logger.info( - "There is no old md5 recorded yet for the input file. Continue data import...") - batch = add_batch_entry(db_path, new_md5ive) - record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( - batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) - logger.info(record_msg) - return batch - - if old_md5ive != new_md5ive: - # the data has changed... insert a new batch entry - batch = add_batch_entry(db_path, new_md5ive) - record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( - batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) - logger.info(record_msg) - return batch - else: - days_since_today = get_days_since_today(old_batch['rbStartTime']) - # TODO: refactor code to use ConfigParser.RawConfigParser in order to - # preserve data types - - if (days_since_today > int(batch_warning_days)): - raw_xml = RawXml(project, raw_xml_file) - msg_file_details = "\nXML file details: " + raw_xml.get_info() - logger.info('Last import was started on: %s which is more than the limit of %s' % (old_batch['rbStartTime'], batch_warning_days)) - if (-1 == int(batch_warning_days)): - msg_continue = """ - The configuration `batch_warning_days = -1` indicates that we want to continue - execution even if the input file did not change - """ + msg_file_details - logger.info(msg_continue) - else: - - msg_quit = "The input file did not change in the past: %s days. Stop data import." % batch_warning_days - logger.critical(msg_quit + msg_file_details) - redi_email.send_email_input_data_unchanged(email_settings, raw_xml) - sys.exit() - else: - logger.info('Reusing md5 entry: ' + str(old_batch['rbID'])) - # return the old batch so we can update the status - return old_batch - - - -""" -Retrieve the row corresponding to the last REDI batch completed -""" - - -def get_last_batch(db_path): - db = None - try: - db = lite.connect(db_path) - db.row_factory = dict_factory - cur = db.cursor() - sql = """ -SELECT - rbID, rbStartTime, rbEndTime, rbMd5Sum -FROM - RediBatch -ORDER BY rbID DESC -LIMIT 1 -""" - cur.execute(sql) - batch = cur.fetchone() - - except lite.Error as e: - logger.error("SQLite error in get_last_batch() for file %s - %s" % (db_path, e.args[0])) - return None - finally: - if db: - db.close() - - return batch - - -""" -Retrieve the row corresponding to the specified primary key -""" - - -def get_batch_by_id(db_path, batch_id): - db = None - try: - db = lite.connect(db_path) - db.row_factory = dict_factory - cur = db.cursor() - sql = """ -SELECT - rbID, rbStartTime, rbEndTime, rbMd5Sum -FROM - RediBatch -WHERE - rbID = ? -LIMIT 1 -""" - cur.execute(sql, (str(batch_id), )) - batch = cur.fetchone() - - except lite.Error as e: - logger.exception("SQLite error in get_batch_by_id(): %s:" % e.args[0]) - raise - # sys.exit(1) - finally: - if db: - db.close() - - return batch - - -""" -@see #check_input_file() -@see https://docs.python.org/2/library/hashlib.html -@see https://docs.python.org/2/library/sqlite3.html#sqlite3.Connection.row_factory - -Returns the md5 sum for the redi input file -""" - - -def get_md5_input_file(input_file): - if not os.path.exists(input_file): - raise Exception('Input file not found at: ' + input_file) - - logger.info('Computing md5 sum for: ' + input_file) - - # open the file in binary mode - f = open(input_file, 'rb') - chunk_size = 2 ** 20 - md5 = hashlib.md5() - - # read the input file in 1MB pieces - while True: - chunk = f.read(chunk_size) - if not chunk: - break - md5.update(chunk) - - return md5.hexdigest() - - -""" -@see #check_input_file() -@param db_path - the SQLite file -@param md5 - the md5 sum to be inserted -""" - - -def add_batch_entry(db_path, md5): - logger.info('Execute: add_batch_entry()') - batch = None - - db = None - try: - db = lite.connect(db_path) - db.row_factory = dict_factory - cur = db.cursor() - sql = """ -INSERT INTO RediBatch - (rbStartTime, rbEndTime, rbStatus, rbMd5Sum) -VALUES - ( ?, NULL, 'Started', ?) -""" - now = get_db_friendly_date_time() - cur.execute(sql, (now, md5)) - rbID = cur.lastrowid - db.commit() - batch = get_batch_by_id(db_path, rbID) - - except lite.Error as e: - logger.error("SQLite error in add_batch_entry() for file %s - %s" % (db_path, e.args[0])) - return None - finally: - if db: - db.close() - - return batch - - -""" -Update the status and the finish time of a specified batch entry in the SQLite db - -@return True if update succeeded, False otherwise -""" - - -def update_batch_entry(db_path, id, status, timestamp): - success = None - db = None - try: - db = lite.connect(db_path) - cur = db.cursor() - sql = """ -UPDATE - RediBatch -SET - rbEndTime = ? - , rbStatus = ? -WHERE - rbID = ? -""" - - cur.execute(sql, (timestamp, status, id)) - db.commit() - scuccess = True - except lite.Error as e: - logger.exception("SQLite error in update_batch_entry(): %s:" % e.args[0]) - success = False - finally: - if db: - db.close() - - return success - - -""" -@return string in format: "2014-06-24 01:23:24" -""" - - -def get_db_friendly_date_time(): - return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - -""" -@return string in format: 2014-06-24 -""" - - -def get_db_friendly_date(): - return date.today() - -""" -@return the number of days passed since the specified date -""" - - -def get_days_since_today(date_string): - num = None - other = datetime.datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S') - now = datetime.datetime.now() - delta = now - other - return delta.days - -""" -Helper function for debugging xml content -""" -def printxml(tree): - print etree.tostring(tree, pretty_print = True) - return diff --git a/redi/upload.py b/redi/upload.py new file mode 100644 index 0000000..15d3528 --- /dev/null +++ b/redi/upload.py @@ -0,0 +1,234 @@ +""" +Functions related to uploading data to REDCap +""" + +__author__ = "University of Florida CTS-IT Team" +__copyright__ = "Copyright 2014, University of Florida" +__license__ = "BSD 3-Clause" + +import ast +import datetime +import logging +import os + +from lxml import etree +from redcap import RedcapError + +from utils import throttle + +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + +DEFAULT_DATA_DIRECTORY = os.getcwd() + + +def create_import_data_json(import_data_dict, event_tree): + """ + create_import_data_json: + This function converts data in event tree into json format. + Parameters: + import_data_dict: This parameter holds the event tree data + event_tree: This parameter holds the event tree data + + @see #generate_output() + """ + + root = event_tree + + event_name = root.find('name') + if event_name is None or not event_name.text: + raise Exception('Expected non-blank element event/name') + + import_data_dict['redcap_event_name'] = event_name.text + + event_field_value_list = root.xpath('//event/field/name') + + for name in event_field_value_list: + if name.text is None: + raise Exception( + 'Expected non-blank element event/field/name') + + # Match all fields to build a row for each + event_field_list = root.xpath('field') + contains_data = False + + for field in event_field_list: + val = field.findtext('value', '') + import_data_dict[field.findtext('name')] = val + + if val and not contains_data: + contains_data = True + + return {'json_data': import_data_dict, 'contains_data': contains_data} + + +def generate_output(person_tree, redcap_client, rate_limit, data_repository, skip_blanks=False): + """ + Note: This function communicates with the redcap application. + Steps: + - loop for each person/form/event element + - generate a csv fragment `using create_eav_output` + - send csv fragment to REDCap using `send_eav_data_to_redcap` + + + @return the report_data dictionary + """ + + # the global dictionary to be returned + report_data = { + 'errors': [] + } + + """ + For each person we keep a count for each form type: + subject_details = array( + 'person_A' => array('form_1': 1, 'form_2': 10, ... + 'person_B' => array('form_1': 1, 'form_2': 10, ... + ... + ) + """ + subject_details = {} + + # For each form type we keep a global count + form_details = {} + + # count how many `person` elements are parsed + person_count = 0 + + root = person_tree.getroot() + persons = root.xpath('//person') + + upload_data = throttle.Throttle(redcap_client.send_data_to_redcap, + int(rate_limit)) + + # main loop for each person + for person in persons: + time_begin = datetime.datetime.now() + person_count += 1 + study_id = (person.xpath('study_id') or [None])[0] + + if study_id is None: + raise Exception('Expected a valid value for study_id') + + # count how many csv fragments are created per person + event_count = 0 + logger.info('Start sending data for study_id: %s' % study_id.text) + + forms = person.xpath('./all_form_events/form') + + # loop through the forms of one person + for form in forms: + form_name = form.xpath('name')[0].text + form_key = 'Total_' + form_name + '_Forms' + study_id_key = study_id.text + + # init dictionary for a new person in (study_id) + if study_id_key not in subject_details: + subject_details[study_id_key] = {} + subject_details[study_id_key]['lab_id'] = person.get('lab_id') + + if not form_key in subject_details[study_id_key]: + subject_details[study_id_key][form_key] = 0 + + if form_key not in form_details: + form_details[form_key] = 0 + + logger.debug( + 'parsing study_id ' + + study_id.text + + ' form: ' + + form_name) + + # loop through the events of one form + for event in form.xpath('event'): + event_status = event.findtext('status') + if event_status == 'sent': + continue + event_count += 1 + + try: + import_dict = { + redcap_client.project.def_field: study_id.text} + import_dict = create_import_data_json( + import_dict, + event) + json_data_dict = import_dict['json_data'] + contains_data = import_dict['contains_data'] + + # If we're skipping blanks and this event is blank, we + # assume all following events are blank; therefore, break + # out of this for-loop and move on to the next form. + if skip_blanks and not contains_data: + break + + if (0 == event_count % 50): + logger.info('Requests sent: %s' % (event_count)) + + # to speedup testing uncomment the following line + # if (0 == event_count % 2) : continue + + try: + found_error = False + upload_data([json_data_dict], overwrite=True) + status = event.find('status') + if status is not None: + status.text = 'sent' + else: + status_element = etree.Element("status") + status_element.text = 'sent' + event.append(status_element) + data_repository.store(person_tree) + except RedcapError as e: + found_error = handle_errors_in_redcap_xml_response( + e.message, + report_data) + + if contains_data: + if not found_error: + # if no errors encountered update event counters + subject_details[study_id_key][form_key] += 1 + form_details[form_key] += 1 + + except Exception as e: + logger.error(e.message) + raise + + time_end = datetime.datetime.now() + logger.info("Total execution time for study_id %s was %s" % (study_id_key, (time_end - time_begin))) + logger.info("Total REDCap requests sent: %s \n" % (event_count)) + + report_data.update({ + 'total_subjects': person_count, + 'form_details': form_details, + 'subject_details': subject_details, + 'errors': report_data['errors'] + }) + + logger.debug('report_data ' + repr(report_data)) + return report_data + + +def handle_errors_in_redcap_xml_response(redcap_response, report_data): + """ + handle_errors_in_redcap_xml_response: + This function checks for any errors in the redcap response and update report data if there are any errors. + Parameters: + redcap_response_xml: This parameter holds the redcap response passed to this function + report_data: This parameter holds the report data passed to this function + + """ + # converting string to dictionary + response = ast.literal_eval(str(redcap_response)) + logger.debug('handling response from the REDCap') + try: + if 'error' in response: + for recordData in response['records']: + error_string = "Error writing to record " + recordData["record"] + " field " + recordData[ + "field_name"] + " Value " + recordData["value"] + ".Error Message: " + recordData["message"] + logger.info(error_string) + report_data['errors'].append(error_string) + else: + logger.error("REDCap response is in unknown format") + except KeyError as e: + logger.error(str(e)) + return True diff --git a/redi/utils/throttle.py b/redi/utils/throttle.py new file mode 100644 index 0000000..f67ce82 --- /dev/null +++ b/redi/utils/throttle.py @@ -0,0 +1,63 @@ +""" +Utility module for throttling calls to a function +""" + +import collections +import datetime +import time + +__author__ = "University of Florida CTS-IT Team" +__copyright__ = "Copyright 2014, University of Florida" +__license__ = "BSD 3-Clause" + + +class Throttle(object): + """ + Limits the number of calls to a function to a given rate. + + The rate limit is equal to the max_calls over the interval_in_seconds. + + :param function: function to call after throttling + :param max_calls: maximum number of calls allowed + :param interval_in_seconds: size of the sliding window + """ + def __init__(self, function, max_calls, interval_in_seconds=60): + assert max_calls > 0 + assert interval_in_seconds > 0 + + self._actual = function + self._max_requests = max_calls + self._interval = datetime.timedelta(seconds=interval_in_seconds) + self._timestamps = collections.deque(maxlen=self._max_requests) + + def __call__(self, *args, **kwargs): + """ Conditionally delays before calling the function """ + self._wait() + self._actual(*args, **kwargs) + + def _limit_reached(self): + """ Returns True if the maximum number of calls has been reached """ + return len(self._timestamps) == self._max_requests + + @staticmethod + def _now(): + # Used during unit testing + return datetime.datetime.now() + + @staticmethod + def _sleep(seconds): + # Used during unit testing + return time.sleep(seconds) + + def _wait(self): + """ Sleeps for the remaining interval if the limit has been reached """ + now = self._now() + + limit_reached = len(self._timestamps) == self._max_requests + if limit_reached: + lapsed = now - self._timestamps[0] + if lapsed <= self._interval: + self._sleep((self._interval - lapsed).total_seconds()) + self._timestamps.clear() + + self._timestamps.append(now) diff --git a/test/TestCreateImportDataJson.py b/test/TestCreateImportDataJson.py index 9ea5ccf..d884da1 100755 --- a/test/TestCreateImportDataJson.py +++ b/test/TestCreateImportDataJson.py @@ -18,35 +18,23 @@ import logging import os from redi import redi -from redi import redi_lib - -file_dir = os.path.dirname(os.path.realpath(__file__)) -goal_dir = os.path.join(file_dir, "../") -proj_root = os.path.abspath(goal_dir)+'/' +from redi import upload DEFAULT_DATA_DIRECTORY = os.getcwd() + class TestCreateImportDataJson(unittest.TestCase): def setUp(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.CONST_STUDY_ID = 73 - - global logger - logger = logging.getLogger('redi') - logging.basicConfig(filename=proj_root+'log/redi.log', - format='%(asctime)s - %(levelname)s - \ - %(name)s - %(message)s', - datefmt='%m/%d/%Y %H:%M:%S', - filemode='w', - level=logging.DEBUG) return() ############################ # == TEST_1 def test_empty_event(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) - logger.info("Running " + __name__ + logging.info("Running " + __name__ + "#test_empty_event() using study_id: " + `self.CONST_STUDY_ID`) # Case 1 input string string_1_empty_event = """ @@ -55,14 +43,14 @@ def test_empty_event(self): """ out_dict_1 = {'study_id':self.CONST_STUDY_ID} etree_1 = etree.ElementTree(etree.fromstring(string_1_empty_event)) - self.assertRaises(Exception, redi_lib.create_import_data_json, out_dict_1, etree_1) + self.assertRaises(Exception, upload.create_import_data_json, out_dict_1, etree_1) ############################ # == TEST_2 def test_empty_event_field_value(self): - logger.info("Running " + __name__ + logging.info("Running " + __name__ + "#test_empty_value() for study_id: " + `self.CONST_STUDY_ID`) # Case 2 input string string_2_empty_values = """ @@ -102,14 +90,14 @@ def test_empty_event_field_value(self): etree_2 = etree.ElementTree(etree.fromstring(string_2_empty_values)) out_dict_2 = {'study_id':self.CONST_STUDY_ID} expected_result_dict_2 = {'contains_data': False, 'json_data': {'chem_complete': '', 'redcap_event_name': '1_arm_1', 'tbil_lborres': '', 'study_id': 73, 'chem_nximport': '', 'tbil_lborresu': '', 'chem_lbdtc': ''}} - actual_result = redi_lib.create_import_data_json(out_dict_2, etree_2) + actual_result = upload.create_import_data_json(out_dict_2, etree_2) self.assertEqual(expected_result_dict_2,actual_result) ############################ # == Test_3 def test_mixed_event_field_value(self): - logger.info("Running " + __name__ + logging.info("Running " + __name__ + "#test_mixed_event_field_value() for study_id: " + `self.CONST_STUDY_ID`) @@ -148,7 +136,7 @@ def test_mixed_event_field_value(self): """ etree_3 = etree.ElementTree(etree.fromstring(string_3_mixed)) out_dict_3 = {'study_id':self.CONST_STUDY_ID} - actual_result = redi_lib.create_import_data_json(out_dict_3, etree_3) + actual_result = upload.create_import_data_json(out_dict_3, etree_3) expected_result = {'contains_data': True, 'json_data': {'chem_complete': '2', 'redcap_event_name': '1_arm_1', 'tbil_lborres': '1.7', 'study_id': 73, 'chem_nximport': 'Y', 'tbil_lborresu': '', 'chem_lbdtc': '1902-12-17'}} self.assertEqual(actual_result, expected_result) @@ -156,7 +144,7 @@ def test_mixed_event_field_value(self): ############################ # == TEST_4 def test_empty_event_field_name(self): - logger.info("Running " + __name__ + logging.info("Running " + __name__ + "#test_empty_event_field_name() for study_id: " + `self.CONST_STUDY_ID`) # Case 4 input string @@ -186,11 +174,11 @@ def test_empty_event_field_name(self): """ etree_4 = etree.ElementTree(etree.fromstring(string_4_blank_name)) out_dict_4 = {'study_id':self.CONST_STUDY_ID} - self.assertRaises(Exception, redi_lib.create_import_data_json, out_dict_4, etree_4) + self.assertRaises(Exception, upload.create_import_data_json, out_dict_4, etree_4) # Verify if code checks for blank `event/name` def test_empty_event_name(self): - logger.info("Running " + __name__ + logging.info("Running " + __name__ + "#test_empty_event_name() for study_id: " + `self.CONST_STUDY_ID`) string_4a_blank_name = """ @@ -202,19 +190,19 @@ def test_empty_event_name(self): """ etree_4a = etree.ElementTree(etree.fromstring(string_4a_blank_name)) - self.assertRaises(Exception, redi_lib.create_import_data_json, self.CONST_STUDY_ID, etree_4a) + self.assertRaises(Exception, upload.create_import_data_json, self.CONST_STUDY_ID, etree_4a) ############################ # == TEST_5 def test_empty_study_id(self) : - logger.info("Running " + __name__ + logging.info("Running " + __name__ + "#test_empty_study_id() for study_id: ''") string_1_empty_event = """ """ string_5_out = "error_study_id_empty" etree_1 = etree.ElementTree(etree.fromstring(string_1_empty_event)) - self.assertRaises(Exception, redi_lib.create_import_data_json,None, etree_1) + self.assertRaises(Exception, upload.create_import_data_json,None, etree_1) def test_multiple_event(self): # motivated by bug 5996 @@ -231,7 +219,7 @@ def test_multiple_event(self): second_event = form.xpath('//event')[1] out_dict_3 = {'study_id':self.CONST_STUDY_ID} - output = redi_lib.create_import_data_json(out_dict_3, second_event) + output = upload.create_import_data_json(out_dict_3, second_event) self.assertTrue(output['contains_data']) self.assertFalse('42_arm_42' in output['json_data']['redcap_event_name']) self.assertTrue('no_arm' in output['json_data']['redcap_event_name']) diff --git a/test/TestDaysSinceToday.py b/test/TestDaysSinceToday.py index 51dda0c..0e96317 100644 --- a/test/TestDaysSinceToday.py +++ b/test/TestDaysSinceToday.py @@ -1,11 +1,9 @@ import unittest import datetime from datetime import timedelta -from redi import redi_lib - +from redi import batch class TestDaysSinceToday(unittest.TestCase): - """ Verify the difference from a past date Verify the difference from a future date @@ -14,10 +12,10 @@ def test(self): past10 = datetime.datetime.now() - timedelta(days = 10) future11 = datetime.datetime.now() + timedelta(days = 11) - diff_past = redi_lib.get_days_since_today( str(past10.strftime('%Y-%m-%d %H:%M:%S') ) ) + diff_past = batch.get_days_since_today( str(past10.strftime('%Y-%m-%d %H:%M:%S') ) ) self.assertEqual(10, diff_past) - diff_future = redi_lib.get_days_since_today( str(future11.strftime('%Y-%m-%d %H:%M:%S') ) ) + diff_future = batch.get_days_since_today( str(future11.strftime('%Y-%m-%d %H:%M:%S') ) ) self.assertEqual(-11, diff_future) diff --git a/test/TestGenerateOutput.py b/test/TestGenerateOutput.py index 205818f..94b5b4a 100755 --- a/test/TestGenerateOutput.py +++ b/test/TestGenerateOutput.py @@ -17,7 +17,7 @@ import os from lxml import etree from redi import redi -from redi import redi_lib +from redi import upload from redi.utils.redcapClient import RedcapClient @@ -33,8 +33,6 @@ class dummyClass: def_field = 'test' def test_person_form_event(self): - redi.logger.info("Running " + __name__ - + "#test_person_form_event() using xml: " ) string_1_xml = """ @@ -175,7 +173,7 @@ def send_data_to_redcap(self, data, overwrite=False): return """Data sent""" etree_1 = etree.ElementTree(etree.fromstring(string_1_xml)) - result = redi_lib.generate_output(etree_1, MockRedcapClient(), 500, + result = upload.generate_output(etree_1, MockRedcapClient(), 500, MockDataRepository()) self.assertEqual(report_data['total_subjects'], result['total_subjects']) self.assertEqual(report_data['form_details'], result['form_details']) diff --git a/test/TestHandleREDCapResponse.py b/test/TestHandleREDCapResponse.py index a05b3d0..7e15a12 100644 --- a/test/TestHandleREDCapResponse.py +++ b/test/TestHandleREDCapResponse.py @@ -1,27 +1,27 @@ import unittest import os from redi import redi -from redi import redi_lib - -file_dir = os.path.dirname(os.path.realpath(__file__)) -goal_dir = os.path.join(file_dir, "../") -proj_root = os.path.abspath(goal_dir)+'/' +from redi import upload DEFAULT_DATA_DIRECTORY = os.getcwd() class TestHandleErrorsInREDCapResponse(unittest.TestCase): + """ Variables setup """ def setUp(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) return() def test_handle_errors_in_redcap_xml_response_valid_case(self): + """ Test the correctness of function + upload.handle_errors_in_redcap_xml_response + """ self.redcap_error = """{"error": "There were data validation errors","records": [{"record": "1 (1_arm_1)", "field_name": "wbc_lborres", "value": "5.4", "message": "This field is located on a form that is locked. You must first unlock this form for this record"}]}""" self.report_data = {'errors':[]} - self.assertTrue(redi_lib.handle_errors_in_redcap_xml_response(self.redcap_error,self.report_data)) + self.assertTrue(upload.handle_errors_in_redcap_xml_response(self.redcap_error, self.report_data)) # Below code is made obsolete because we are handling errors only in case of exceptions.We are not checking for errors in valid cases anymore. # def test_handle_errors_in_redcap_xml_response_with_no_error(self): @@ -36,7 +36,7 @@ def test_handle_errors_in_redcap_xml_response_with_no_errorKey_in_report_data(se "value": "5.4", "message": "This field is located on a form that is locked. You must first unlock this form for this record"}]}""" self.report_data = {} - self.assertTrue(redi_lib.handle_errors_in_redcap_xml_response(self.redcap_pass,self.report_data)) + self.assertTrue(upload.handle_errors_in_redcap_xml_response(self.redcap_pass, self.report_data)) def tearDown(self): diff --git a/test/TestParseAll.py b/test/TestParseAll.py index 58397d0..d004f61 100644 --- a/test/TestParseAll.py +++ b/test/TestParseAll.py @@ -19,31 +19,16 @@ import os from redi import redi -file_dir = os.path.dirname(os.path.realpath(__file__)) -goal_dir = os.path.join(file_dir, "../") -proj_root = os.path.abspath(goal_dir)+'/' - DEFAULT_DATA_DIRECTORY = os.getcwd() class TestParseAll(unittest.TestCase): def setUp(self): - global logger - logger = logging.getLogger('redi') - logging.basicConfig(filename=DEFAULT_DATA_DIRECTORY, - format='%(asctime)s - %(levelname)s - \ - %(name)s - %(message)s', - datefmt='%m/%d/%Y %H:%M:%S', - filemode='w', - level=logging.DEBUG) - return() + redi.configure_logging(DEFAULT_DATA_DIRECTORY) ############################ # == TEST_1 - config/formEvents.xml def test_parse_form_events(self): - redi.configure_logging(DEFAULT_DATA_DIRECTORY) - logger.info("Running " + __name__ - + "#test_person_form_event() using xml: " ) string_1_xml = """ Project diff --git a/test/TestSuite.py b/test/TestSuite.py index eb86b04..a3ec7ec 100755 --- a/test/TestSuite.py +++ b/test/TestSuite.py @@ -38,9 +38,11 @@ from TestCopyDataToPersonFormEventTree import TestCopyDataToPersonFormEventTree from TestGetEMRData import TestGetEMRData from TestResume import TestResume +from TestThrottle import TestThrottle from TestPersonFormEventsRepository import TestPersonFormEventsRepository from TestVerifyAndCorrectCollectionDate import TestVerifyAndCorrectCollectionDate from TestRediEmail import TestRediEmail +from TestDaysSinceToday import TestDaysSinceToday class redi_suite(unittest.TestSuite): @@ -79,8 +81,10 @@ def suite(self): redi_test_suite.addTest(TestCopyDataToPersonFormEventTree) redi_test_suite.addTest(TestGetEMRData) redi_test_suite.addTest(TestResume) + redi_test_suite.addTest(TestThrottle) redi_test_suite.addTest(TestPersonFormEventsRepository) redi_test_suite.addTest(TestRediEmail) + redi_test_suite.addTest(TestDaysSinceToday) # return the suite return unittest.TestSuite([redi_test_suite]) diff --git a/test/TestThrottle.py b/test/TestThrottle.py new file mode 100644 index 0000000..a816b51 --- /dev/null +++ b/test/TestThrottle.py @@ -0,0 +1,43 @@ +#!/usr/bin/env python + +import datetime +import unittest + +from redi.utils import throttle + + +class TestThrottle(unittest.TestCase): + + def test_throttle(self): + class Clock(object): + def __init__(self): + self.now = datetime.datetime.now() + + def __call__(self): + return self.now + + def add_seconds(self, seconds): + self.now += datetime.timedelta(seconds=seconds) + + clock = Clock() + throttle.Throttle._now = clock + throttle.Throttle._sleep = clock.add_seconds + + call = throttle.Throttle(lambda: None, max_calls=3, + interval_in_seconds=5) + + call() # t=0 + clock.add_seconds(1) + call() # t=1 + clock.add_seconds(2) + call() # t=3 + clock.add_seconds(1) + call() # t=4 + self.assertEquals(1, len(call._timestamps)) + clock.add_seconds(1) + call() # t=5 + self.assertEquals(2, len(call._timestamps)) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/TestUpdateStatusField.py b/test/TestUpdateStatusField.py index 10b6256..0e0a010 100644 --- a/test/TestUpdateStatusField.py +++ b/test/TestUpdateStatusField.py @@ -3,16 +3,16 @@ import os from redi import redi -file_dir = os.path.dirname(os.path.realpath(__file__)) -goal_dir = os.path.join(file_dir, "../") -proj_root = os.path.abspath(goal_dir)+'/' - DEFAULT_DATA_DIRECTORY = os.getcwd() class TestUpdateStatusField(unittest.TestCase): - def test_update_status_field_value_when_one_subject_with_two_forms_with_one_event_in_each_form(self): + def setUp(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) + + + def test_update_status_field_value_when_one_subject_with_two_forms_with_one_event_in_each_form(self): + self.source = """ 99 @@ -153,7 +153,6 @@ def test_update_status_field_value_when_one_subject_with_two_forms_with_one_even self.assertEqual(self.expect, result) def test_update_status_field_value_when_one_subject_with_two_forms_with_two_events_in_each_form(self): - redi.configure_logging(proj_root+'log/redi.log') self.source = """ 99 @@ -370,7 +369,6 @@ def test_update_status_field_value_when_one_subject_with_two_forms_with_two_even self.assertEqual(self.expect, result) def test_update_status_field_value_when_two_subjects_with_two_forms_with_one_event_in_each_form(self): - redi.configure_logging(proj_root+'log/redi.log') self.source = """ 99 @@ -609,7 +607,6 @@ def test_update_status_field_value_when_two_subjects_with_two_forms_with_one_eve self.assertEqual(self.expect, result) def test_update_status_field_value_when_one_subject_with_no_form(self): - redi.configure_logging(proj_root+'log/redi.log') self.source = """ 99 @@ -662,7 +659,6 @@ def test_update_status_field_value_when_one_subject_with_no_form(self): self.assertEqual(self.expect, result) def test_update_status_field_value_when_one_subject_with_two_forms_event_missing_in_one_of_the_forms(self): - redi.configure_logging(proj_root+'log/redi.log') self.source = """ 99 @@ -757,7 +753,6 @@ def test_update_status_field_value_when_one_subject_with_two_forms_event_missing self.assertEqual(self.expect, result) def test_update_status_field_value_when_one_subject_with_one_form_one_event_value_tag_missing(self): - redi.configure_logging(proj_root+'log/redi.log') self.source = """ 99 diff --git a/test/TestWriteToFile.py b/test/TestWriteToFile.py index 06b5dcd..57fd929 100755 --- a/test/TestWriteToFile.py +++ b/test/TestWriteToFile.py @@ -1,23 +1,13 @@ -''' -@author : Radha -email : rkandula@ufl.edu - -This file is to test the function writeElementTreetoFile of bin/redi.py -This file should be run from the project level folder (one level up from /bin) - -''' import unittest import os from lxml import etree from redi import redi -file_dir = os.path.dirname(os.path.realpath(__file__)) -goal_dir = os.path.join(file_dir, "../") -proj_root = os.path.abspath(goal_dir)+'/' - DEFAULT_DATA_DIRECTORY = os.getcwd() class TestWriteToFile(unittest.TestCase): + + """ Variables setup """ def setUp(self): redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.test_raw_xml = """ @@ -36,32 +26,16 @@ def setUp(self): 5.0 3.9 - - 001-0001 - 09/11/18 - 04/18/19 - 11:57 - Y - 04/14/20 - ALKALINE PHOSPHATASE - 1525848 - U/L - 35 - 129 - 112 - """ - ''' ''' - - # this is a function to test the writeElementTreetoFile function. - # we called it with input file and tried to write the element tree to an XML file - def testWriteElementTreetoFile(self): - import xml.etree.ElementTree as ET + def test_write_element_tree_to_file(self): + """ Test the correctness of function + redi.write_element_tree_to_file() + """ tree = etree.ElementTree(etree.fromstring(self.test_raw_xml)) root = tree.getroot() - redi.write_element_tree_to_file(tree,'testWriteFile.xml') + redi.write_element_tree_to_file(tree, 'testWriteFile.xml') assert os.path.exists('testWriteFile.xml') == 1 os.remove('testWriteFile.xml') diff --git a/vagrant/Makefile b/vagrant/Makefile index fa17c6f..5747bb3 100644 --- a/vagrant/Makefile +++ b/vagrant/Makefile @@ -19,7 +19,7 @@ ifneq ("$(wildcard $(MAKE_CONFIG_FILE))", "") REDCAP_API_URI := $(shell cat ${CONFIG_FILE} | sed -e 's/ //g' | grep -v '^\#' | grep 'redcap_uri=' | cut -d '=' -f2) REDCAP_VM_URI := $(subst api/,,$(REDCAP_API_URI)) REDCAP_VM_TOKEN := $(shell cat ${CONFIG_FILE} | sed -e 's/ //g' | grep -v '^\#' | grep 'token=' | cut -d '=' -f2) - REDCAP_RECORDS_CMD:=../bin/utils/redcap_records.py --token=$(REDCAP_VM_TOKEN) --url=$(REDCAP_API_URI) + REDCAP_RECORDS_CMD:=../redi/utils/redcap_records.py --token=$(REDCAP_VM_TOKEN) --url=$(REDCAP_API_URI) REDCAP_PROJECT_ID := $(shell cat ${MAKE_CONFIG_FILE} | sed -e 's/ //g' | grep -v '^\#' | grep 'redcap_project_id=' | cut -d '=' -f2) REDCAP_PROJECT_FORMS := $(shell cat ${MAKE_CONFIG_FILE} | sed -e 's/ //g' | grep -v '^\#' | grep 'redcap_project_forms=' | cut -d '=' -f2) REDCAP_PROJECT_ENROLLMENT_FORM := $(shell cat ${MAKE_CONFIG_FILE} | sed -e 's/ //g' | grep -v '^\#' | grep 'redcap_project_enrollment_form=' | cut -d '=' -f2) @@ -114,7 +114,7 @@ rc_enrollment: check_config $(REDCAP_RECORDS_CMD) -i $(ENROLLMENT_CSV_FILE) rc_post: - python ../bin/redi.py -c $(CONFIG_FOLDER) + python ../redi/redi.py -c $(CONFIG_FOLDER) rc_get: check_config $(REDCAP_RECORDS_CMD) -f "$(REDCAP_PROJECT_FORMS)" From 95d99ef63aaa9e38b225413746ae7a40d163df5d Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Wed, 19 Nov 2014 16:38:58 -0500 Subject: [PATCH 47/51] Resolve merge conflicts --- redi/upload.py | 20 +++++++++----------- redi/utils/rawxml.py | 2 +- test/TestGenerateOutput.py | 4 ++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/redi/upload.py b/redi/upload.py index 15d3528..f1b7be9 100644 --- a/redi/upload.py +++ b/redi/upload.py @@ -62,7 +62,8 @@ def create_import_data_json(import_data_dict, event_tree): return {'json_data': import_data_dict, 'contains_data': contains_data} -def generate_output(person_tree, redcap_client, rate_limit, data_repository, skip_blanks=False): +def generate_output(person_tree, redcap_client, rate_limit, sent_events, + skip_blanks=False): """ Note: This function communicates with the redcap application. Steps: @@ -141,8 +142,11 @@ def generate_output(person_tree, redcap_client, rate_limit, data_repository, ski # loop through the events of one form for event in form.xpath('event'): - event_status = event.findtext('status') - if event_status == 'sent': + event_name = event.findtext('name', '') + assert event_name, "Missing name for form event" + + if sent_events.was_sent(study_id_key, form_name, event_name): + logger.debug("Skipping previously sent " + event_name) continue event_count += 1 @@ -170,14 +174,8 @@ def generate_output(person_tree, redcap_client, rate_limit, data_repository, ski try: found_error = False upload_data([json_data_dict], overwrite=True) - status = event.find('status') - if status is not None: - status.text = 'sent' - else: - status_element = etree.Element("status") - status_element.text = 'sent' - event.append(status_element) - data_repository.store(person_tree) + sent_events.mark_sent(study_id_key, form_name, event_name) + logger.debug("Sent " + event_name) except RedcapError as e: found_error = handle_errors_in_redcap_xml_response( e.message, diff --git a/redi/utils/rawxml.py b/redi/utils/rawxml.py index 02c326b..0039c1d 100644 --- a/redi/utils/rawxml.py +++ b/redi/utils/rawxml.py @@ -5,7 +5,7 @@ class RawXml(object): """ This class is used to store details about the input file - @see redi_lib/check_input_file() + @see redi.batch.check_input_file() """ def __init__(self, project, path): diff --git a/test/TestGenerateOutput.py b/test/TestGenerateOutput.py index 9900bba..1d12544 100755 --- a/test/TestGenerateOutput.py +++ b/test/TestGenerateOutput.py @@ -184,8 +184,8 @@ def send_data_to_redcap(self, data, overwrite=False): return """Data sent""" etree_1 = etree.ElementTree(etree.fromstring(string_1_xml)) - result = redi_lib.generate_output(etree_1, MockRedcapClient(), 500, - MockSentEventIndex()) + result = upload.generate_output(etree_1, MockRedcapClient(), 500, + MockSentEventIndex()) self.assertEqual(report_data['total_subjects'], result['total_subjects']) self.assertEqual(report_data['form_details'], result['form_details']) self.assertEqual(report_data['subject_details'], result['subject_details']) From 1bc56c3c18dd8faef4b8b412590a32ab79fd7773 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Thu, 20 Nov 2014 13:25:12 -0500 Subject: [PATCH 48/51] Remove redi_lib.py redi_lib.py was split up into several files and was previously removed, but was accidentally restored and recommitted with a subsequent merge. --- redi/redi_lib.py | 688 ----------------------------------------------- 1 file changed, 688 deletions(-) delete mode 100644 redi/redi_lib.py diff --git a/redi/redi_lib.py b/redi/redi_lib.py deleted file mode 100644 index d3312e1..0000000 --- a/redi/redi_lib.py +++ /dev/null @@ -1,688 +0,0 @@ -""" -redi_lib.py - - Stores a collection of utility functions used by redi.py -""" - -__author__ = "University of Florida CTS-IT Team" -__copyright__ = "Copyright 2014, University of Florida" -__license__ = "BSD 2-Clause" - -import datetime -import os -import stat -import time -import ast -from redcap import RedcapError -import tempfile -import sqlite3 as lite -from datetime import date -import hashlib -import utils.redi_email as redi_email -from utils.rawxml import RawXml -from requests import RequestException -from lxml import etree -import logging -import sys -logger = logging.getLogger(__name__) -logger.addHandler(logging.NullHandler()) - -DEFAULT_DATA_DIRECTORY = os.getcwd() - - - -""" -create_import_data_json: -This function converts data in event tree into json format. -Parameters: - import_data_dict: This parameter holds the event tree data - event_tree: This parameter holds the event tree data - -@see #generate_output() -""" - - -def create_import_data_json( - import_data_dict, - event_tree): - - root = event_tree - - event_name = root.find('name') - if event_name is None or not event_name.text: - raise Exception('Expected non-blank element event/name') - - import_data_dict['redcap_event_name'] = event_name.text - - event_field_value_list = root.xpath('//event/field/name') - - for name in event_field_value_list: - if name.text is None: - raise Exception( - 'Expected non-blank element event/field/name') - - # Match all fields to build a row for each - event_field_list = root.xpath('field') - contains_data = False - - for field in event_field_list: - val = get_child_text_safely(field, 'value') - import_data_dict[field.findtext('name')] = val - - if val and not contains_data: - contains_data = True - - return {'json_data': import_data_dict, 'contains_data': contains_data} - -""" -Convenience function -@see create_eav_output -""" - - -def get_child_text_safely(etree, ele): - ele = etree.find(ele) - if ele.text is None: - return '' - else: - return ele.text - -""" -Note: This function communicates with the redcap application. -Steps: - - loop for each person/form/event element - - generate a csv fragment `using create_eav_output` - - send csv fragment to REDCap using `send_eav_data_to_redcap` - - -@return the report_data dictionary -""" - - -def generate_output(person_tree, redcap_client, rate_limit, sent_events, - skip_blanks=False): - - # the global dictionary to be returned - report_data = { - 'errors': [] - } - - """ - For each person we keep a count for each form type: - subject_details = array( - 'person_A' => array('form_1': 1, 'form_2': 10, ... - 'person_B' => array('form_1': 1, 'form_2': 10, ... - ... - ) - """ - subject_details = {} - - # For each form type we keep a global count - form_details = {} - - # count how many `person` elements are parsed - person_count = 0 - - root = person_tree.getroot() - persons = root.xpath('//person') - - rate_limiter_value_in_redcap = float(rate_limit) - - - ideal_time_per_request = 60 / float(rate_limiter_value_in_redcap) - time_stamp_after_request = 0 - - # main loop for each person - for person in persons: - time_begin = datetime.datetime.now() - person_count += 1 - study_id = (person.xpath('study_id') or [None])[0] - - if study_id is None: - raise Exception('Expected a valid value for study_id') - - # count how many csv fragments are created per person - event_count = 0 - logger.info('Start sending data for study_id: %s' % study_id.text) - - forms = person.xpath('./all_form_events/form') - - # loop through the forms of one person - for form in forms: - form_name = form.xpath('name')[0].text - form_key = 'Total_' + form_name + '_Forms' - study_id_key = study_id.text - - # init dictionary for a new person in (study_id) - if study_id_key not in subject_details: - subject_details[study_id_key] = {} - subject_details[study_id_key]['lab_id'] = person.get('lab_id') - - if not form_key in subject_details[study_id_key]: - subject_details[study_id_key][form_key] = 0 - - if form_key not in form_details: - form_details[form_key] = 0 - - logger.debug( - 'parsing study_id ' + - study_id.text + - ' form: ' + - form_name) - - # loop through the events of one form - for event in form.xpath('event'): - event_name = event.findtext('name', '') - assert event_name, "Missing name for form event" - - if sent_events.was_sent(study_id_key, form_name, event_name): - logger.debug("Skipping previously sent " + event_name) - continue - event_count += 1 - - try: - import_dict = { - redcap_client.project.def_field: study_id.text} - import_dict = create_import_data_json( - import_dict, - event) - json_data_dict = import_dict['json_data'] - contains_data = import_dict['contains_data'] - - # If we're skipping blanks and this event is blank, we - # assume all following events are blank; therefore, break - # out of this for-loop and move on to the next form. - if skip_blanks and not contains_data: - break - - time_lapse_since_last_request = time.time( - ) - time_stamp_after_request - sleepTime = max( - ideal_time_per_request - - time_lapse_since_last_request, - 0) - # print 'Sleep for: %s seconds' % sleepTime - time.sleep(sleepTime) - - if (0 == event_count % 50): - logger.info('Requests sent: %s' % (event_count)) - - # to speedup testing uncomment the following line - # if (0 == event_count % 2) : continue - - try: - found_error = False - response = redcap_client.send_data_to_redcap([json_data_dict], overwrite = True) - sent_events.mark_sent(study_id_key, form_name, event_name) - logger.debug("Sent " + event_name) - except RedcapError as e: - found_error = handle_errors_in_redcap_xml_response( - e.message, - report_data) - - time_stamp_after_request = time.time() - - if contains_data: - if not found_error: - # if no errors encountered update event counters - subject_details[study_id_key][form_key] += 1 - form_details[form_key] += 1 - - except Exception as e: - logger.error(e.message) - raise - - time_end = datetime.datetime.now() - logger.info("Total execution time for study_id %s was %s" % (study_id_key, (time_end - time_begin))) - logger.info("Total REDCap requests sent: %s \n" % (event_count)) - - report_data.update({ - 'total_subjects': person_count, - 'form_details': form_details, - 'subject_details': subject_details, - 'errors': report_data['errors'] - }) - - logger.debug('report_data ' + repr(report_data)) - return report_data - -""" -handle_errors_in_redcap_xml_response: -This function checks for any errors in the redcap response and update report data if there are any errors. -Parameters: - redcap_response_xml: This parameter holds the redcap response passed to this function - report_data: This parameter holds the report data passed to this function - -""" - - -def handle_errors_in_redcap_xml_response(redcap_response, report_data): - # converting string to dictionary - response = ast.literal_eval(str(redcap_response)) - logger.debug('handling response from the REDCap') - try: - if 'error' in response: - for recordData in response['records']: - error_string = "Error writing to record " + recordData["record"] + " field " + recordData[ - "field_name"] + " Value " + recordData["value"] + ".Error Message: " + recordData["message"] - logger.info(error_string) - report_data['errors'].append(error_string) - else: - logger.error("REDCap response is in unknown format") - except KeyError as e: - logger.error(str(e)) - return True - - -# Convenience method for getting the first element -# Note: for printing an object can use: print repr(obj) -def get_first_item(aList): - if aList: - return aList[0] - return None - -""" -create_temp_dir_debug: -Creates a folder name with the following format: - ./out/out_YYYY_mm_dd:00:11:22 -""" - - -def create_temp_dir_debug(existing_folder=(DEFAULT_DATA_DIRECTORY + 'out')): - if not os.path.exists(existing_folder): - try: - os.makedirs(existing_folder) - except: - logger.exception( - "Folder cannot be created at the path " + - existing_folder) - raise - prefix = 'out_' + datetime.datetime.now().strftime('%Y_%m_%d-%H_%M_%S') - mydir = existing_folder + '/' + prefix - os.mkdir(mydir) - return mydir - -""" -get_temp_path: -This function returns a folder path. -If user enters yes along with the optional command line argument -k, then a folder will be created under project root having a timestamp for its name. The files stored under this folder will not be destroyed. -If user does not use the -k switch then a temporary folder is created at a random location. After the execution of the program, this folder is destroyed. -""" - - -def get_temp_path(do_keep_gen_files): - if do_keep_gen_files: - return create_temp_dir_debug() + '/' - else: - return tempfile.mkdtemp('/') - -""" -delete_temporary_folder: -deletes the temporary folder specified as argument along with its contents. -""" - - -def delete_temporary_folder(tmp_folder): - fileList = os.listdir(tmp_folder) - for fileName in fileList: - os.remove(tmp_folder + "/" + fileName) - try: - os.rmdir(tmp_folder) - except OSError: - logger.exception( - "Folder " + - tmp_folder + - "is not empty, hence cannot be deleted.") - raise - -""" -@see #check_input_file() - -The first time we run the app there is no SQLite file -where to store the md5 sums of the input file. -This function creates an empty RediBatch in the SQLite -file specified as `db_path` - -@return True if the database file was properly created with an empty table -""" - -def create_empty_md5_database(db_path) : - if os.path.exists(db_path) : - logger.warn('The file with name ' + db_path + ' already exists') - #return - - try : - logger.info('Opening the file:' + db_path) - fresh_file = open(db_path, 'w') - fresh_file.close() - os.chmod(db_path, stat.S_IRUSR | stat.S_IWUSR) - time.sleep(5) - - except IOError as e: - logger.error("I/O error: " + e.strerror + ' for file: ' + db_path) - return False - success = create_empty_table(db_path) - return success - -""" -Helper for #create_empty_md5_database() -""" - -def create_empty_table(db_path) : - logger.info('exec create_empty_table') - db = None - try: - db = lite.connect(db_path) - cur = db.cursor() - sql = """CREATE TABLE RediBatch ( - rbID INTEGER PRIMARY KEY AUTOINCREMENT, - rbStartTime TEXT NOT NULL, - rbEndTime TEXT, - rbStatus TEXT, - rbMd5Sum TEXT NOT NULL -) - """ - cur.execute(sql) - - except lite.Error as e: - logger.error("SQLite error in create_empty_table(): " + e.args[0]) - return False - finally: - if db: - db.close() - logger.info('success create_empty_table') - return True - - -""" -Use this function to set the `row_factory` -attribute of the database connection -""" - - -def dict_factory(cursor, row): - d = {} - for idx, col in enumerate(cursor.description): - d[col[0]] = row[idx] - return d - -""" -@see redi/redi.py#main() -@return a dictionary representation of the batch row for the current run - -Check the md5sum of the input file - - if the sum *has changed* then continue the data processing and store a row - in the SQLite database with `batch status= batch_started/ batch_completed` - - - if the sum *did not change* then check the config option `batch_warning_days`: - - if limit = -1 then continue execution (ignore the limit) - - if days_passed > limit then stop the process and email the `redi_admin` - -""" - - -def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, project): - batch = None - - if not os.path.exists(db_path) : - create_empty_md5_database(db_path) - - new_md5ive = get_md5_input_file(raw_xml_file) - new_msg = 'Using SQLite file: %s to store input file: %s md5 sum: %s' % ( - db_path, raw_xml_file, new_md5ive) - logger.info(new_msg) - - old_batch = get_last_batch(db_path) - old_md5ive = None - if old_batch: - old_md5ive = old_batch['rbMd5Sum'] - logger.info('Old md5 sum for the input file is: ' + old_md5ive) - else: - # this is the first time the checksum feature is used - logger.info( - "There is no old md5 recorded yet for the input file. Continue data import...") - batch = add_batch_entry(db_path, new_md5ive) - record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( - batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) - logger.info(record_msg) - return batch - - if old_md5ive != new_md5ive: - # the data has changed... insert a new batch entry - batch = add_batch_entry(db_path, new_md5ive) - record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( - batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) - logger.info(record_msg) - return batch - else: - days_since_today = get_days_since_today(old_batch['rbStartTime']) - # TODO: refactor code to use ConfigParser.RawConfigParser in order to - # preserve data types - - if (days_since_today > int(batch_warning_days)): - raw_xml = RawXml(project, raw_xml_file) - msg_file_details = "\nXML file details: " + raw_xml.get_info() - logger.info('Last import was started on: %s which is more than the limit of %s' % (old_batch['rbStartTime'], batch_warning_days)) - if (-1 == int(batch_warning_days)): - msg_continue = """ - The configuration `batch_warning_days = -1` indicates that we want to continue - execution even if the input file did not change - """ + msg_file_details - logger.info(msg_continue) - else: - - msg_quit = "The input file did not change in the past: %s days. Stop data import." % batch_warning_days - logger.critical(msg_quit + msg_file_details) - redi_email.send_email_input_data_unchanged(email_settings, raw_xml) - sys.exit() - else: - logger.info('Reusing md5 entry: ' + str(old_batch['rbID'])) - # return the old batch so we can update the status - return old_batch - - - -""" -Retrieve the row corresponding to the last REDI batch completed -""" - - -def get_last_batch(db_path): - db = None - try: - db = lite.connect(db_path) - db.row_factory = dict_factory - cur = db.cursor() - sql = """ -SELECT - rbID, rbStartTime, rbEndTime, rbMd5Sum -FROM - RediBatch -ORDER BY rbID DESC -LIMIT 1 -""" - cur.execute(sql) - batch = cur.fetchone() - - except lite.Error as e: - logger.error("SQLite error in get_last_batch() for file %s - %s" % (db_path, e.args[0])) - return None - finally: - if db: - db.close() - - return batch - - -""" -Retrieve the row corresponding to the specified primary key -""" - - -def get_batch_by_id(db_path, batch_id): - db = None - try: - db = lite.connect(db_path) - db.row_factory = dict_factory - cur = db.cursor() - sql = """ -SELECT - rbID, rbStartTime, rbEndTime, rbMd5Sum -FROM - RediBatch -WHERE - rbID = ? -LIMIT 1 -""" - cur.execute(sql, (str(batch_id), )) - batch = cur.fetchone() - - except lite.Error as e: - logger.exception("SQLite error in get_batch_by_id(): %s:" % e.args[0]) - raise - # sys.exit(1) - finally: - if db: - db.close() - - return batch - - -""" -@see #check_input_file() -@see https://docs.python.org/2/library/hashlib.html -@see https://docs.python.org/2/library/sqlite3.html#sqlite3.Connection.row_factory - -Returns the md5 sum for the redi input file -""" - - -def get_md5_input_file(input_file): - if not os.path.exists(input_file): - raise Exception('Input file not found at: ' + input_file) - - logger.info('Computing md5 sum for: ' + input_file) - - # open the file in binary mode - f = open(input_file, 'rb') - chunk_size = 2 ** 20 - md5 = hashlib.md5() - - # read the input file in 1MB pieces - while True: - chunk = f.read(chunk_size) - if not chunk: - break - md5.update(chunk) - - return md5.hexdigest() - - -""" -@see #check_input_file() -@param db_path - the SQLite file -@param md5 - the md5 sum to be inserted -""" - - -def add_batch_entry(db_path, md5): - logger.info('Execute: add_batch_entry()') - batch = None - - db = None - try: - db = lite.connect(db_path) - db.row_factory = dict_factory - cur = db.cursor() - sql = """ -INSERT INTO RediBatch - (rbStartTime, rbEndTime, rbStatus, rbMd5Sum) -VALUES - ( ?, NULL, 'Started', ?) -""" - now = get_db_friendly_date_time() - cur.execute(sql, (now, md5)) - rbID = cur.lastrowid - db.commit() - batch = get_batch_by_id(db_path, rbID) - - except lite.Error as e: - logger.error("SQLite error in add_batch_entry() for file %s - %s" % (db_path, e.args[0])) - return None - finally: - if db: - db.close() - - return batch - - -""" -Update the status and the finish time of a specified batch entry in the SQLite db - -@return True if update succeeded, False otherwise -""" - - -def update_batch_entry(db_path, id, status, timestamp): - success = None - db = None - try: - db = lite.connect(db_path) - cur = db.cursor() - sql = """ -UPDATE - RediBatch -SET - rbEndTime = ? - , rbStatus = ? -WHERE - rbID = ? -""" - - cur.execute(sql, (timestamp, status, id)) - db.commit() - scuccess = True - except lite.Error as e: - logger.exception("SQLite error in update_batch_entry(): %s:" % e.args[0]) - success = False - finally: - if db: - db.close() - - return success - - -""" -@return string in format: "2014-06-24 01:23:24" -""" - - -def get_db_friendly_date_time(): - return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') - -""" -@return string in format: 2014-06-24 -""" - - -def get_db_friendly_date(): - return date.today() - -""" -@return the number of days passed since the specified date -""" - - -def get_days_since_today(date_string): - num = None - other = datetime.datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S') - now = datetime.datetime.now() - delta = now - other - return delta.days - -""" -Helper function for debugging xml content -""" -def printxml(tree): - print etree.tostring(tree, pretty_print = True) - return From a0834d429440962512f3b029ea78658cd64ff6a8 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Thu, 20 Nov 2014 13:29:19 -0500 Subject: [PATCH 49/51] Update version number to 0.13.0 --- redi/redi.py | 2 +- setup.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/redi/redi.py b/redi/redi.py index a8111a1..a960aaf 100755 --- a/redi/redi.py +++ b/redi/redi.py @@ -31,7 +31,7 @@ __author__ = "Nicholas Rejack" __copyright__ = "Copyright 2013, University of Florida" __license__ = "BSD 2-Clause" -__version__ = "0.11.3" +__version__ = "0.13.0" __email__ = "nrejack@ufl.edu" __status__ = "Development" diff --git a/setup.py b/setup.py index bb2e02c..1d5f98c 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name='redi-py', - version='0.12.0', + version='0.13.0', author='https://www.ctsi.ufl.edu/research/study-development/informatics-consulting/', author_email='cts-it-red@ctsi.ufl.edu', packages=find_packages(exclude=['test']), @@ -28,7 +28,7 @@ 'redi': ['README.md'], }, url='https://github.com/ctsit/redi', - download_url = 'https://github.com/ctsit/redi/releases/tag/0.12.0', + download_url = 'https://github.com/ctsit/redi/releases/tag/0.13.0', keywords = ['EMR', 'EHR', 'REDCap', 'Clinical Data'], license='BSD 3-Clause', description='REDCap Electronic Data Importer', @@ -48,6 +48,7 @@ test_suite='test.TestSuite', tests_require=[ "mock >= 1.0", + "sftpserver >= 0.2", ], setup_requires=[ "nose >= 1.0", From 8ddf769f22bde18ea58dbea5e3fa9f391d65b8d3 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Thu, 20 Nov 2014 13:33:20 -0500 Subject: [PATCH 50/51] Update documentation --- docs/api/bin.rst | 45 --------------------- docs/api/bin.utils.rst | 62 ---------------------------- docs/api/modules.rst | 7 ++-- docs/api/redi.rst | 69 +++++++++++++++++++++++++++++++ docs/api/redi.utils.rst | 78 ++++++++++++++++++++++++++++++++++++ docs/integration_testing.rst | 8 ++-- 6 files changed, 154 insertions(+), 115 deletions(-) delete mode 100644 docs/api/bin.rst delete mode 100644 docs/api/bin.utils.rst create mode 100644 docs/api/redi.rst create mode 100644 docs/api/redi.utils.rst diff --git a/docs/api/bin.rst b/docs/api/bin.rst deleted file mode 100644 index 86e72a4..0000000 --- a/docs/api/bin.rst +++ /dev/null @@ -1,45 +0,0 @@ -bin package -=========== - -Subpackages ------------ - -.. toctree:: - - bin.utils - -Submodules ----------- - -bin.form module ---------------- - -.. automodule:: bin.form - :members: - :undoc-members: - :show-inheritance: - -bin.redi module ---------------- - -.. automodule:: bin.redi - :members: - :undoc-members: - :show-inheritance: - -bin.redi_lib module -------------------- - -.. automodule:: bin.redi_lib - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bin - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/bin.utils.rst b/docs/api/bin.utils.rst deleted file mode 100644 index 35295c1..0000000 --- a/docs/api/bin.utils.rst +++ /dev/null @@ -1,62 +0,0 @@ -bin.utils package -================= - -Submodules ----------- - -bin.utils.GetEmrData module ---------------------------- - -.. automodule:: bin.utils.GetEmrData - :members: - :undoc-members: - :show-inheritance: - -bin.utils.SimpleConfigParser module ------------------------------------ - -.. automodule:: bin.utils.SimpleConfigParser - :members: - :undoc-members: - :show-inheritance: - -bin.utils.csv2xml module ------------------------- - -.. automodule:: bin.utils.csv2xml - :members: - :undoc-members: - :show-inheritance: - -bin.utils.redcapClient module ------------------------------ - -.. automodule:: bin.utils.redcapClient - :members: - :undoc-members: - :show-inheritance: - -bin.utils.redcap_records module -------------------------------- - -.. automodule:: bin.utils.redcap_records - :members: - :undoc-members: - :show-inheritance: - -bin.utils.redi_email module ---------------------------- - -.. automodule:: bin.utils.redi_email - :members: - :undoc-members: - :show-inheritance: - - -Module contents ---------------- - -.. automodule:: bin.utils - :members: - :undoc-members: - :show-inheritance: diff --git a/docs/api/modules.rst b/docs/api/modules.rst index c08f98d..14c3b5c 100644 --- a/docs/api/modules.rst +++ b/docs/api/modules.rst @@ -1,8 +1,7 @@ -.. -== +redi +==== .. toctree:: :maxdepth: 4 - bin - + redi diff --git a/docs/api/redi.rst b/docs/api/redi.rst new file mode 100644 index 0000000..d183a5c --- /dev/null +++ b/docs/api/redi.rst @@ -0,0 +1,69 @@ +redi package +============ + +Subpackages +----------- + +.. toctree:: + + redi.utils + +Submodules +---------- + +redi.batch module +----------------- + +.. automodule:: redi.batch + :members: + :undoc-members: + :show-inheritance: + +redi.form module +---------------- + +.. automodule:: redi.form + :members: + :undoc-members: + :show-inheritance: + +redi.redi module +---------------- + +.. automodule:: redi.redi + :members: + :undoc-members: + :show-inheritance: + +redi.redi_lib module +-------------------- + +.. automodule:: redi.redi_lib + :members: + :undoc-members: + :show-inheritance: + +redi.report module +------------------ + +.. automodule:: redi.report + :members: + :undoc-members: + :show-inheritance: + +redi.upload module +------------------ + +.. automodule:: redi.upload + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: redi + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/api/redi.utils.rst b/docs/api/redi.utils.rst new file mode 100644 index 0000000..8d9969a --- /dev/null +++ b/docs/api/redi.utils.rst @@ -0,0 +1,78 @@ +redi.utils package +================== + +Submodules +---------- + +redi.utils.GetEmrData module +---------------------------- + +.. automodule:: redi.utils.GetEmrData + :members: + :undoc-members: + :show-inheritance: + +redi.utils.SimpleConfigParser module +------------------------------------ + +.. automodule:: redi.utils.SimpleConfigParser + :members: + :undoc-members: + :show-inheritance: + +redi.utils.csv2xml module +------------------------- + +.. automodule:: redi.utils.csv2xml + :members: + :undoc-members: + :show-inheritance: + +redi.utils.rawxml module +------------------------ + +.. automodule:: redi.utils.rawxml + :members: + :undoc-members: + :show-inheritance: + +redi.utils.redcapClient module +------------------------------ + +.. automodule:: redi.utils.redcapClient + :members: + :undoc-members: + :show-inheritance: + +redi.utils.redcap_records module +-------------------------------- + +.. automodule:: redi.utils.redcap_records + :members: + :undoc-members: + :show-inheritance: + +redi.utils.redi_email module +---------------------------- + +.. automodule:: redi.utils.redi_email + :members: + :undoc-members: + :show-inheritance: + +redi.utils.throttle module +-------------------------- + +.. automodule:: redi.utils.throttle + :members: + :undoc-members: + :show-inheritance: + + +Module contents +--------------- + +.. automodule:: redi.utils + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/integration_testing.rst b/docs/integration_testing.rst index c5f175b..97feb75 100644 --- a/docs/integration_testing.rst +++ b/docs/integration_testing.rst @@ -97,7 +97,7 @@ Erase the data in the correct project if necessary: :: - $ ../bin/utils/redcap_records.py --token=121212 --url=http://localhost:8998/redcap/api/ -i demographic_test_data.csv + $ ../redi/utils/redcap_records.py --token=121212 --url=http://localhost:8998/redcap/api/ -i demographic_test_data.csv On success the following text is returned: @@ -119,7 +119,7 @@ On success the following text is returned: Find and adjust the "Rate Limiter" field to something like 60000 - $ python ../bin/redi.py + $ python ../redi/redi.py If the output from the command above produces an exception then check if your IP was not banned due to numerous requests sent (@see related code @@ -149,7 +149,7 @@ If the token is invalid the following error is returned: :: - $ ../bin/utils/redcap_records.py --token=121212 --url=http://localhost:8998/redcap/api/ -f "demgraphics chemistry" > output_B.csv + $ ../redi/utils/redcap_records.py --token=121212 --url=http://localhost:8998/redcap/api/ -f "demgraphics chemistry" > output_B.csv If you have a lot of forms, the output comparison is easier if you export the forms separately like this: @@ -166,7 +166,7 @@ export the forms separately like this: for form in $forms do - ../bin/utils/redcap_records.py --token=121212 --url=http://localhost:8998/redcap/api/ --forms=$form > $batch/$form.csv + ../redi/utils/redcap_records.py --token=121212 --url=http://localhost:8998/redcap/api/ --forms=$form > $batch/$form.csv done Later do the diff like this: From f22ec261d0002a9272d3a7a94d2a8d3f272218b4 Mon Sep 17 00:00:00 2001 From: Taeber Rapczak Date: Thu, 20 Nov 2014 14:05:01 -0500 Subject: [PATCH 51/51] Update CHANGELOG and AUTHORS.md for v0.13.0 --- AUTHORS.md | 2 +- CHANGELOG | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/AUTHORS.md b/AUTHORS.md index f2ddb3f..76d13fb 100644 --- a/AUTHORS.md +++ b/AUTHORS.md @@ -24,5 +24,5 @@ Erik Schmidt, and Chris Barnes provided direction to the development effort. Radha Kandula, Mohan Das Katragadda, Ruchi Desai, Yang Li, Kumar Sadhu, Alex Loiacono, Erik Schmidt, Nicholas Rejack, Philip Chase, Roy Keyes, -and Andrei Sura provided code to make this project awesome. +Andrei Sura, and Taeber Rapczak provided code to make this project awesome. We are nothing without our developers. diff --git a/CHANGELOG b/CHANGELOG index 989837f..47f77ce 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,79 @@ +2014-11-20 v0.13.0 + +The focus of this release has been clean up. Related functions were moved into +their own files for easier maintenance and comprehension. Reports were enhanced +and bugs related to them were fixed. Throttling and uploading were both +re-examined and new algorithms implemented which, during testing, run-times +decreased anywhere between 12 and 57%. + + * Add new REDCap Rate Limit throttling algorithm + * Improve speed of marking events as sent + * Add script that generates enrollment records + * Rename "bin" to "redi" + * Write report.html to data directory instead of current directory + * Write report.xml to data directory instead of configuration directory + * Added helper class `bin/utils/rawxml.py` to store information about the + input file (can add more info if needed) + * Extract report creation and sending from _run() + * Add more useful aliases for vagrant box + * Updated documentation + * Enhanced report + * Add the `report.html` as attachment + +2014-10-07 v0.12.0 + +* Summary: Converted documentation to .rst format, enable key-based access to SFTP servers to fetch data, enable installation via fetching from the PyPI repository. + + * remove parse_args() function (Radha Kandula) + * Add docopt to redi (Radha Kandula) + * Small fix for the README links (Andrei Sura) + * Use the `cts-it-red@ctsi.ufl.edu` email for consistency with `setup.py` (Andrei Sura) + * Document installation from binaries: `pip install redi-py` (Andrei Sura) + * Restore the short README.md because it is referenced by `setup.py` (Andrei Sura) + * Remove unused `doc` folder All `*.rst` files are in now in the `docs` folder (Andrei Sura) + * Move `doc/flowcharts` to `docs/flowcharts` (Andrei Sura) + * Fix links in `README.rst` (Andrei Sura) + * Replace `README.md` by a shorter `README.rst` because we have the full content in `docs/about.rst` (Andrei Sura) + * Add `help` tasks in the the main `Makefile` (Andrei Sura) + * Fix some misspellings of REDCap (Taeber Rapczak) + * Fix configuration files to allow uploadig to the pypi.python.org repo @see https://pypi.python.org/pypi/redi-py/0.11.3 (Andrei Sura) + * Fix broken documentation links during conversion to `*.rst` (Andrei Sura) + * Remove files `setup.rst and test.rst` from `docs/api` to avoid unnecessary document creation (Andrei Sura) + * added default emr password of None (Nicholas Rejack) + * removed emr sftp server password from required parameters (Nicholas Rejack) + * removed author name (Radha Kandula) + * Make `GetEmrData.py` immune to `string` passed as port number (Andrei Sura) + * Add `requirements.txt` file for RTD (Andrei Sura) + * Use default theme in `docs/conf.py` (Andrei Sura) + * Edit one method doc to verify if sphinx can pick it (Andrei Sura) + * Save important files for docs `sphix-ification` (Andrei Sura) + * Save converted files from `doc/*.md` to `docs/*.rst` (Andrei Sura) + * Copy images from `doc/images` to `docs/images` for the ReST migration (Andrei Sura) + * implemented review comments for pull request #84 (Radha Kandula) + * Add contact info to `README.md` (Andrei Sura) + * Update AUTHORS.md (Andrei Sura) + * Expand documentation about `Installation Steps Using Source Code` + Add short section `How to Use RED-I` (Andrei Sura) + * One line documentation fix for deprecated `--keep=yes` (Andrei Sura) + * Reformat `describing_a_redcap_form_to_red-i.md` for shorter lines + Moved `screenshot-field-name-lookup.png` to the `images` folder (Andrei Sura) + * add fields() function to Event class to get all the fields of it (Radha Kandula) + * Rules for clearing values using keyword 'cancel' (Radha Kandula) + * Fix anchors for document sections in README.md (Andrei Sura) + * Link to Philip's RED-I presentation from the main README.md (Andrei Sura) + * Link to our code review guidelines `doc/code-review-checklist.md` from the main README.md (Andrei Sura) + * Add the removed pieces from README.md as seprate files in the `doc/` folder (Andrei Sura) + * Add more links to existing files in the `doc/` folder from README.md (Andrei Sura) + * Add support for --record parameter in redcap_records.py (Philip Chase) + * Fix issue with download file name when connecting to sftp (Andrei Sura) + * Remove developer specific details from main README.md (Andrei Sura) + * Add dependency on `sftpserver` to `.travis.yml` (Andrei Sura) + * Add unit test for connecting to sftp with a private key (Andrei Sura) + * Use new `sfpt_server_*` params when connecting to sftp with a private key (Andrei Sura) + * Use `EmrFileAccessDetails` object for passing new sftp params: * sftp_server_port * sftp_server_private_key * sftp_server_private_key_pass (Andrei Sura) + * Correct names for two tasks in `vagrant/Makefile` (Andrei Sura) + * Improve documentation in `config-example/settings.ini` Closes issue #63 (Andrei Sura) + * Remove unused script: `exportForms.sh` (Andrei Sura) + * Use boolean args `keep` and `emrdata` Closes issue #53 with dedicated unit test class `TestArgs` (Andrei Sura) + 2014-09-23 v0.11.3 * Summary: Update config-example to work with new sample project