diff --git a/.gitignore b/.gitignore index e6b2fe2..ae5639a 100644 --- a/.gitignore +++ b/.gitignore @@ -41,6 +41,7 @@ vagrant/redcap-backup-*.sql vagrant/REDI-*.egg vagrant/redcap.zip +vagrant/plugins/ vagrant/redcap_database.sql vagrant/sqlPatches vagrant/data/ @@ -51,3 +52,5 @@ vagrant/redi.db data/ redi.db report.html +redi.pstats +callgraph.svg diff --git a/CHANGELOG b/CHANGELOG index 47f77ce..4cdbcde 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,14 @@ +2014-12-04 v0.13.1 + + * Fixed major flaw in the Throttle class introduced in v0.13.0 + * Added --bulk-send-blanks, which will send all blank form-events together + * Added support for redcap_plugins to our vagrant testing and sample project + * Added new Makefile targets for various helper tasks like profiling and clearing the IP ban list + * Installed mcrypt package for PHP to enable plugins in vagrant + * Added a REDCap plugin to generate URLs for enhanced error reporting + * Fixed #34: Data Import Report totals only reflect most recent run + * Added execution time to the report + 2014-11-20 v0.13.0 The focus of this release has been clean up. Related functions were moved into diff --git a/Makefile b/Makefile index fc016a2..e49cc49 100644 --- a/Makefile +++ b/Makefile @@ -60,6 +60,7 @@ clean: rm -rf vagrant/data/ rm -f vagrant/redi.db rm -f config-example/report.xml + rm -f redi.pstats pypi: #https://pythonhosted.org/Distutils2/distutils/packageindex.html diff --git a/redi/batch.py b/redi/batch.py index a567cde..cde9a76 100644 --- a/redi/batch.py +++ b/redi/batch.py @@ -23,7 +23,8 @@ logger = logging.getLogger(__name__) logger.addHandler(logging.NullHandler()) - +BATCH_STATUS_STARTED = 'Started' +BATCH_STATUS_COMPLETED = 'Completed' """ @see #check_input_file() @@ -66,8 +67,9 @@ def create_empty_table(db_path) : cur = db.cursor() sql = """CREATE TABLE RediBatch ( rbID INTEGER PRIMARY KEY AUTOINCREMENT, - rbStartTime TEXT NOT NULL, - rbEndTime TEXT, + rbCreateTime DATETIME DEFAULT CURRENT_TIMESTAMP, + rbStartTime DATETIME, + rbEndTime DATETIME, rbStatus TEXT, rbMd5Sum TEXT NOT NULL ) @@ -102,7 +104,7 @@ def dict_factory(cursor, row): Check the md5sum of the input file - if the sum *has changed* then continue the data processing and store a row - in the SQLite database with `batch status= batch_started/ batch_completed` + in the SQLite database with `batch status= started/ completed` - if the sum *did not change* then check the config option `batch_warning_days`: - if limit = -1 then continue execution (ignore the limit) @@ -111,7 +113,7 @@ def dict_factory(cursor, row): """ -def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, project): +def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, project, start_time): batch = None if not os.path.exists(db_path) : @@ -132,27 +134,28 @@ def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, logger.info( "There is no old md5 recorded yet for the input file. Continue data import...") batch = add_batch_entry(db_path, new_md5ive) - record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( - batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) + record_msg = 'Added batch (rbID= %s, rbCreateTime= %s, rbMd5Sum= %s' % ( + batch['rbID'], batch['rbCreateTime'], batch['rbMd5Sum']) logger.info(record_msg) return batch if old_md5ive != new_md5ive: # the data has changed... insert a new batch entry batch = add_batch_entry(db_path, new_md5ive) - record_msg = 'Added batch (rbID= %s, rbStartTime= %s, rbMd5Sum= %s' % ( - batch['rbID'], batch['rbStartTime'], batch['rbMd5Sum']) + record_msg = 'Added batch (rbID= %s, rbCreateTime= %s, rbMd5Sum= %s' % ( + batch['rbID'], batch['rbCreateTime'], batch['rbMd5Sum']) logger.info(record_msg) return batch else: - days_since_today = get_days_since_today(old_batch['rbStartTime']) + days_since_today = get_days_since_today(old_batch['rbCreateTime']) # TODO: refactor code to use ConfigParser.RawConfigParser in order to # preserve data types if (days_since_today > int(batch_warning_days)): raw_xml = RawXml(project, raw_xml_file) msg_file_details = "\nXML file details: " + raw_xml.get_info() - logger.info('Last import was started on: %s which is more than the limit of %s' % (old_batch['rbStartTime'], batch_warning_days)) + logger.info('Last import was started on: %s which is more than '\ + ' the limit of %s' % (old_batch['rbStartTime'], batch_warning_days)) if (-1 == int(batch_warning_days)): msg_continue = """ The configuration `batch_warning_days = -1` indicates that we want to continue @@ -161,7 +164,7 @@ def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, logger.info(msg_continue) else: - msg_quit = "The input file did not change in the past: %s days. Stop data import." % batch_warning_days + msg_quit = "The input file did not change in the past: %s days." % days_since_today logger.critical(msg_quit + msg_file_details) redi_email.send_email_input_data_unchanged(email_settings, raw_xml) sys.exit() @@ -178,14 +181,14 @@ def check_input_file(batch_warning_days, db_path, email_settings, raw_xml_file, def get_last_batch(db_path): - db = None + batch = None try: db = lite.connect(db_path) db.row_factory = dict_factory cur = db.cursor() sql = """ SELECT - rbID, rbStartTime, rbEndTime, rbMd5Sum + rbID, rbCreateTime, rbStartTime, rbEndTime, rbMd5Sum FROM RediBatch ORDER BY rbID DESC @@ -204,20 +207,18 @@ def get_last_batch(db_path): return batch -""" -Retrieve the row corresponding to the specified primary key -""" - - def get_batch_by_id(db_path, batch_id): db = None + """ + Retrieve the row corresponding to the specified primary key + """ try: db = lite.connect(db_path) db.row_factory = dict_factory cur = db.cursor() sql = """ SELECT - rbID, rbStartTime, rbEndTime, rbMd5Sum + rbID, rbCreateTime, rbStartTime, rbEndTime, rbMd5Sum FROM RediBatch WHERE @@ -238,16 +239,15 @@ def get_batch_by_id(db_path, batch_id): return batch -""" -@see #check_input_file() -@see https://docs.python.org/2/library/hashlib.html -@see https://docs.python.org/2/library/sqlite3.html#sqlite3.Connection.row_factory - -Returns the md5 sum for the redi input file -""" - def get_md5_input_file(input_file): + """ + @see #check_input_file() + @see https://docs.python.org/2/library/hashlib.html + @see https://docs.python.org/2/library/sqlite3.html#sqlite3.Connection.row_factory + + Returns the md5 sum for the redi input file + """ if not os.path.exists(input_file): raise Exception('Input file not found at: ' + input_file) @@ -268,30 +268,31 @@ def get_md5_input_file(input_file): return md5.hexdigest() -""" -@see #check_input_file() -@param db_path - the SQLite file -@param md5 - the md5 sum to be inserted -""" - - def add_batch_entry(db_path, md5): - logger.info('Execute: add_batch_entry()') - batch = None - - db = None + """ + Inserts a row into RediBatch table + @see #check_input_file() + Parameters + ---------- + db_path : string + The SQLite database file name + md5 : string + The md5 sum to be inserted + create_time : string + The batch start time + """ try: db = lite.connect(db_path) db.row_factory = dict_factory cur = db.cursor() sql = """ INSERT INTO RediBatch - (rbStartTime, rbEndTime, rbStatus, rbMd5Sum) + (rbCreateTime,rbStartTime, rbEndTime, rbStatus, rbMd5Sum) VALUES - ( ?, NULL, 'Started', ?) + ( ?, NULL, NULL, ?, ?) """ - now = get_db_friendly_date_time() - cur.execute(sql, (now, md5)) + create_time = get_db_friendly_date_time() + cur.execute(sql, (create_time, BATCH_STATUS_STARTED, md5)) rbID = cur.lastrowid db.commit() batch = get_batch_by_id(db_path, rbID) @@ -306,16 +307,19 @@ def add_batch_entry(db_path, md5): return batch -""" -Update the status and the finish time of a specified batch entry in the SQLite db - -@return True if update succeeded, False otherwise -""" - +def update_batch_entry(db_path, id, status, start_time, end_time): + """ + Update the status and the start/end time of a specified batch entry + Return True if update succeeded, False otherwise -def update_batch_entry(db_path, id, status, timestamp): - success = None - db = None + Parameters + ---------- + db_path : string + id : integer + status : string + start_time : datetime string + end_time : datetime string + """ try: db = lite.connect(db_path) cur = db.cursor() @@ -323,15 +327,16 @@ def update_batch_entry(db_path, id, status, timestamp): UPDATE RediBatch SET - rbEndTime = ? + rbStartTime = ? + , rbEndTime = ? , rbStatus = ? WHERE rbID = ? """ - cur.execute(sql, (timestamp, status, id)) + cur.execute(sql, (start_time, end_time, status, id)) db.commit() - scuccess = True + success = True except lite.Error as e: logger.exception("SQLite error in update_batch_entry(): %s:" % e.args[0]) success = False @@ -342,37 +347,32 @@ def update_batch_entry(db_path, id, status, timestamp): return success -""" -@return string in format: "2014-06-24 01:23:24" -""" - - def get_db_friendly_date_time(): + """ + @return string in format: "2014-06-24 01:23:24" + """ return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') -""" -@return string in format: 2014-06-24 -""" - def get_db_friendly_date(): + """ + @return string in format: 2014-06-24 + """ return datetime.date.today() -""" -@return the number of days passed since the specified date -""" - - def get_days_since_today(date_string): + """ + @return the number of days passed since the specified date + """ num = None other = datetime.datetime.strptime(date_string, '%Y-%m-%d %H:%M:%S') now = datetime.datetime.now() delta = now - other return delta.days -""" -Helper function for debugging xml content -""" def printxml(tree): + """ + Helper function for debugging xml content + """ print etree.tostring(tree, pretty_print = True) return diff --git a/redi/redi.py b/redi/redi.py index a960aaf..7d5f83b 100755 --- a/redi/redi.py +++ b/redi/redi.py @@ -4,7 +4,7 @@ Usage: redi.py -h | --help - redi.py [-v] [-k] [-e] [-d] [-r] [-c=] [-D=] [-s] + redi.py [-v] [-k] [-e] [-d] [-r] [-c=] [-D=] [-s] [-b] Options: -h --help show this help message and exit @@ -26,12 +26,13 @@ running multiple simultaneous instances of redi for different projects -s --skip-blanks skip blank events when sending event data to REDCap [default:False] + -b --bulk-send-blanks send blank events in bulk instead of individually [default:False] """ __author__ = "Nicholas Rejack" __copyright__ = "Copyright 2013, University of Florida" __license__ = "BSD 2-Clause" -__version__ = "0.13.0" +__version__ = "0.13.1" __email__ = "nrejack@ufl.edu" __status__ = "Development" @@ -176,7 +177,7 @@ def main(): _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, output_files, db_path, redcap_client, report_courier, report_creator, args['--resume'], - args['--skip-blanks']) + args['--skip-blanks'], args['--bulk-send-blanks']) def get_db_path(batch_info_database, database_path): @@ -260,7 +261,8 @@ def connect_to_redcap(email_settings, redcap_settings, dry_run=False): def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, get_emr_data, settings, data_folder, database_path, redcap_client, - report_courier, report_creator, resume=False, skip_blanks=False): + report_courier, report_creator, resume=False, skip_blanks=False, + bulk_send_blanks=False): global translational_table_tree assert _person_form_events_service is not None @@ -281,15 +283,13 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, # load custom post-processing rules rules = load_rules(settings.rules, configuration_directory) - # read in 3 main data files / translation tables - raw_xml_file = os.path.join(configuration_directory, settings.raw_xml_file) - # we need the batch information to set the - # status to `completed` an ste the `rbEndTime` email_settings = get_email_settings(settings) db_path = database_path + # Insert/load batch row so we can set the `completed` status + start_time = batch.get_db_friendly_date_time() current_batch = _check_input_file(db_path, email_settings, raw_xml_file, - settings) + settings,start_time) form_events_file = os.path.join(configuration_directory,\ settings.form_events_file) @@ -321,7 +321,23 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, # Use the new method to communicate with REDCap report_data = upload.generate_output( person_form_event_tree_with_data, redcap_client, - settings.rate_limiter_value_in_redcap, sent_events, skip_blanks) + settings.rate_limiter_value_in_redcap, sent_events, skip_blanks, + bulk_send_blanks) + + # Save the time it took to send data to REDCap + done_time = batch.get_db_friendly_date_time() + # Update the batch row + batch.update_batch_entry(database_path, + current_batch['rbID'], + batch.BATCH_STATUS_COMPLETED, + start_time, + done_time) + duration_dict = { + 'all' : { + 'start': start_time, + 'end': done_time, + }, + } # write person_form_event_tree to file write_element_tree_to_file(person_form_event_tree_with_data,\ @@ -330,7 +346,6 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, logger.warning( 'Some of the events were not sent to the REDCap server. Please ' "check the log file or {0}/sent_events.idx".format(data_folder)) - # Add any errors from running the rules to the report map(logger.warning, rule_errors) @@ -339,16 +354,13 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, # create summary report html_str = report_creator.create_report( - report_data, alert_summary, collection_date_summary_dict) + report_data, + alert_summary, + collection_date_summary_dict, + duration_dict) report_courier.deliver(html_str) - if current_batch: - # Update the batch row - done_timestamp = batch.get_db_friendly_date_time() - batch.update_batch_entry(db_path, current_batch['rbID'], 'Completed', - done_timestamp) - if dry_run: logger.info("End of dry run. All output files are ready for review"\ " in " + data_folder) @@ -507,16 +519,16 @@ def _create_person_form_event_tree_with_data( collection_date_summary_dict -def _check_input_file(db_path, email_settings, raw_xml_file, settings): +def _check_input_file(db_path, email_settings, raw_xml_file, settings,start_time): return batch.check_input_file(settings.batch_warning_days, db_path, email_settings, raw_xml_file, - settings.project) + settings.project,start_time) -def read_config(config_file, configuration_directory, file_list): - """function to check if files mentioned in configuration files exist - Philip +def read_config(config_file, configuration_directory, file_list): + """ + Check if files mentioned in configuration files exist """ for item in file_list: if not os.path.exists(os.path.join(configuration_directory, item)): diff --git a/redi/report.py b/redi/report.py index e5b9f9c..f86fb58 100644 --- a/redi/report.py +++ b/redi/report.py @@ -2,6 +2,7 @@ import time import pkg_resources +from datetime import datetime, timedelta from lxml import etree from utils import redi_email @@ -99,7 +100,7 @@ def __init__(self, report_file_path, project_name, redcap_uri, } self._writer = writer - def create_report(self, report_data, alert_summary, collection_date_summary_dict): + def create_report(self, report_data, alert_summary, collection_date_summary_dict, duration_dict): report_parameters = self._report_parameters write_element_tree_to_file = self._writer @@ -121,6 +122,13 @@ def create_report(self, report_data, alert_summary, collection_date_summary_dict sort_by_value = 'lab_id' if report_parameters['is_sort_by_lab_id'] else 'redcap_id' root.append(gen_ele("sort_details_by", sort_by_value)) + start = duration_dict['all']['start'] + end = duration_dict['all']['end'] + diff = self.get_time_diff(end, start) + root.append(gen_ele('time_all_start', start[-8:])) + root.append(gen_ele('time_all_end', end[-8:])) + root.append(gen_ele('time_all_diff', self.format_seconds_as_string(diff))) + tree = etree.ElementTree(root) write_element_tree_to_file(tree,report_parameters.get('report_file_path')) @@ -133,6 +141,36 @@ def create_report(self, report_data, alert_summary, collection_date_summary_dict return html_str + def get_time_diff(self, end, start): + """ + Get time difference in seconds from the two dates + Parameters + ---------- + end : string + The end timestamp + start : string + The start timestamp + """ + # sqlite: select strftime('%s', rbEndTime) - strftime('%s', rbStartTime) from RediBatch; + fmt = '%Y-%m-%d %H:%M:%S' + dt_end = datetime.strptime(end, fmt) + dt_start = datetime.strptime(start, fmt) + diff = (dt_end - dt_start).total_seconds() + return diff + + + def format_seconds_as_string(self,seconds): + """ + Convert seconds to a friendly strings + 3662 ==> '01:01:02' + 89662 ==> '1 day, 0:54:22' + Parameters + ---------- + seconds : integer + The number of seconds to be converted + """ + return str(timedelta(seconds=seconds)) + def updateReportHeader(root, report_parameters): """ Update the passed `root` element tree with date, project name and url""" diff --git a/redi/upload.py b/redi/upload.py index f1b7be9..4ab7a0e 100644 --- a/redi/upload.py +++ b/redi/upload.py @@ -7,6 +7,7 @@ __license__ = "BSD 3-Clause" import ast +import collections import datetime import logging import os @@ -62,8 +63,32 @@ def create_import_data_json(import_data_dict, event_tree): return {'json_data': import_data_dict, 'contains_data': contains_data} +def create_redcap_records(import_data): + """ + Creates REDCap records from RED-I's form data, AKA import data. + + REDCap API only accepts records for importing. Records are differentiated by + their unique record ID, unless the REDCap Project is a Longitudinal study. + In that case, they are differentiated by a combination of record ID and an + event. + + Since RED-I views the world in terms of forms, we have to project our + form-centric view into REDCap's record-centric world. This is done by + combining all form data with the same Subject ID and Event Name into the + same record. + + :param import_data: iterable of 4-tuples: (study_id_key, form_name, + event_name, json_data_dict) + :return: iterable of REDCap records ready for upload + """ + records_by_subject_and_event = collections.defaultdict(dict) + for subject_id_key, _, event_name, record in import_data: + records_by_subject_and_event[subject_id_key, event_name].update(record) + return records_by_subject_and_event.itervalues() + + def generate_output(person_tree, redcap_client, rate_limit, sent_events, - skip_blanks=False): + skip_blanks=False, bulk_send_blanks=False): """ Note: This function communicates with the redcap application. Steps: @@ -102,6 +127,8 @@ def generate_output(person_tree, redcap_client, rate_limit, sent_events, upload_data = throttle.Throttle(redcap_client.send_data_to_redcap, int(rate_limit)) + blanks = [] + # main loop for each person for person in persons: time_begin = datetime.datetime.now() @@ -145,11 +172,6 @@ def generate_output(person_tree, redcap_client, rate_limit, sent_events, event_name = event.findtext('name', '') assert event_name, "Missing name for form event" - if sent_events.was_sent(study_id_key, form_name, event_name): - logger.debug("Skipping previously sent " + event_name) - continue - event_count += 1 - try: import_dict = { redcap_client.project.def_field: study_id.text} @@ -159,12 +181,28 @@ def generate_output(person_tree, redcap_client, rate_limit, sent_events, json_data_dict = import_dict['json_data'] contains_data = import_dict['contains_data'] - # If we're skipping blanks and this event is blank, we - # assume all following events are blank; therefore, break - # out of this for-loop and move on to the next form. - if skip_blanks and not contains_data: - break - + if sent_events.was_sent(study_id_key, form_name, event_name): + logger.debug("Skipping previously sent " + event_name) + if contains_data: + # if no error_strings encountered update event counters + subject_details[study_id_key][form_key] += 1 + form_details[form_key] += 1 + continue + + is_blank = not contains_data + if is_blank: + if skip_blanks: + # assume subsequent events for this form and subject + # are blank and simply move on to the next form by + # breaking out of the events-loop + break + + if bulk_send_blanks: + blanks.append((study_id_key, form_name, event_name, + json_data_dict)) + continue + + event_count += 1 if (0 == event_count % 50): logger.info('Requests sent: %s' % (event_count)) @@ -176,17 +214,15 @@ def generate_output(person_tree, redcap_client, rate_limit, sent_events, upload_data([json_data_dict], overwrite=True) sent_events.mark_sent(study_id_key, form_name, event_name) logger.debug("Sent " + event_name) + if contains_data: + # if no errors encountered update event counters + subject_details[study_id_key][form_key] += 1 + form_details[form_key] += 1 except RedcapError as e: found_error = handle_errors_in_redcap_xml_response( e.message, report_data) - if contains_data: - if not found_error: - # if no errors encountered update event counters - subject_details[study_id_key][form_key] += 1 - form_details[form_key] += 1 - except Exception as e: logger.error(e.message) raise @@ -195,6 +231,19 @@ def generate_output(person_tree, redcap_client, rate_limit, sent_events, logger.info("Total execution time for study_id %s was %s" % (study_id_key, (time_end - time_begin))) logger.info("Total REDCap requests sent: %s \n" % (event_count)) + if blanks: + logger.info("Sending blank forms in bulk...") + records = list(create_redcap_records(blanks)) + + try: + response = upload_data(records, overwrite=True) + for study_id_key, form_name, event_name, record in blanks: + sent_events.mark_sent(study_id_key, form_name, event_name) + logger.info("Sent {} blank form-events.".format(response['count'])) + except RedcapError as error: + logger.error("Failed to send blank form-events.") + handle_errors_in_redcap_xml_response(error.message, report_data) + report_data.update({ 'total_subjects': person_count, 'form_details': form_details, diff --git a/redi/utils/report.xsl b/redi/utils/report.xsl index 154d33b..a46adaf 100644 --- a/redi/utils/report.xsl +++ b/redi/utils/report.xsl @@ -68,6 +68,17 @@ $(document).ready(function() { + + Start/End Times + + From to + + + + + Duration + +

Summary

@@ -96,7 +107,9 @@ $(document).ready(function() {
- + + +

Import Alerts

@@ -139,6 +152,7 @@ $(document).ready(function() {
+

Subject Details

diff --git a/redi/utils/throttle.py b/redi/utils/throttle.py index f67ce82..434461b 100644 --- a/redi/utils/throttle.py +++ b/redi/utils/throttle.py @@ -4,6 +4,7 @@ import collections import datetime +import logging import time __author__ = "University of Florida CTS-IT Team" @@ -11,6 +12,10 @@ __license__ = "BSD 3-Clause" +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + + class Throttle(object): """ Limits the number of calls to a function to a given rate. @@ -33,7 +38,7 @@ def __init__(self, function, max_calls, interval_in_seconds=60): def __call__(self, *args, **kwargs): """ Conditionally delays before calling the function """ self._wait() - self._actual(*args, **kwargs) + return self._actual(*args, **kwargs) def _limit_reached(self): """ Returns True if the maximum number of calls has been reached """ @@ -44,6 +49,12 @@ def _now(): # Used during unit testing return datetime.datetime.now() + def _remove_old_entries(self): + """ Removes old timestamp entries """ + while (len(self._timestamps) > 0 and + self._now() - self._timestamps[0] >= self._interval): + self._timestamps.popleft() + @staticmethod def _sleep(seconds): # Used during unit testing @@ -51,13 +62,16 @@ def _sleep(seconds): def _wait(self): """ Sleeps for the remaining interval if the limit has been reached """ - now = self._now() + if self._limit_reached(): + logger.debug('Throttling limit reached.') + lapsed = self._now() - self._timestamps[0] + + if lapsed < self._interval: + sleep_time = (self._interval - lapsed).total_seconds() + logger.debug("Sleeping for {} seconds to prevent too many calls" + .format(sleep_time)) + self._sleep(sleep_time) - limit_reached = len(self._timestamps) == self._max_requests - if limit_reached: - lapsed = now - self._timestamps[0] - if lapsed <= self._interval: - self._sleep((self._interval - lapsed).total_seconds()) - self._timestamps.clear() + self._remove_old_entries() - self._timestamps.append(now) + self._timestamps.append(self._now()) diff --git a/redi/utils/url_generator.py b/redi/utils/url_generator.py new file mode 100644 index 0000000..22998c7 --- /dev/null +++ b/redi/utils/url_generator.py @@ -0,0 +1,60 @@ +import logging +from HTMLParser import HTMLParser +import urllib +import urllib2 +""" +Usage: + Below is the sample code to use this utility. + + from url_generator Import url_generator + + url = "http://localhost:8998/redcap/plugins/show_url.php" + values = {"project_name":"Classic Database", + "study_id":"999-001", + "page_name":"demographics", + "event_name":"Event 1"} + parser = InitPlugin(url, values) + + +""" +# Configure module's logger +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) + +class InitPlugin(): + """Call the Form URL Plugin for REDCap with POST parameters + and get the response. + Parse the response and extract the URL form it. + + """ + def __init__(self, url, values): + self.url = url + self.values = values + log_str = "Initializing Plugin parser with URL: "+self.url + logger.info(log_str) + self.get_response(self.url, self.values) + + def get_response(self, url, values): + post_data = urllib.urlencode(values) + prepared_request = urllib2.Request(url, post_data) + response = urllib2.urlopen(prepared_request) + html_string = response.read() + parser = PluginParser() + parser.feed(html_string) + return parser.output + +class PluginParser(HTMLParser): + """Parser module for the HTML response received + + """ + def feed(self, data): + self.output = [] + HTMLParser.feed(self, data) + + def handle_starttag(self, tag, attrs): + # search for tag 'a' + if tag == "a": + # iterate through properties of anchor tag + for name, value in attrs: + if name == "href": + self.output.append(value) diff --git a/setup.py b/setup.py index 1d5f98c..08f8b9b 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name='redi-py', - version='0.13.0', + version='0.13.1', author='https://www.ctsi.ufl.edu/research/study-development/informatics-consulting/', author_email='cts-it-red@ctsi.ufl.edu', packages=find_packages(exclude=['test']), @@ -28,7 +28,7 @@ 'redi': ['README.md'], }, url='https://github.com/ctsit/redi', - download_url = 'https://github.com/ctsit/redi/releases/tag/0.13.0', + download_url = 'https://github.com/ctsit/redi/releases/tag/0.13.1', keywords = ['EMR', 'EHR', 'REDCap', 'Clinical Data'], license='BSD 3-Clause', description='REDCap Electronic Data Importer', diff --git a/test/TestCreateSummaryReport.py b/test/TestCreateSummaryReport.py index 64e3388..788025f 100644 --- a/test/TestCreateSummaryReport.py +++ b/test/TestCreateSummaryReport.py @@ -1,6 +1,3 @@ -''' -Unit test for `redi.create_summary_report()` -''' import unittest import os import sys @@ -13,8 +10,14 @@ DEFAULT_DATA_DIRECTORY = os.getcwd() class TestCreateSummaryReport(unittest.TestCase): + """ + Unit test for `redi.create_summary_report()` + """ def setUp(self): + """ + Prepare data structures + """ redi.configure_logging(DEFAULT_DATA_DIRECTORY) self.test_report_params = { 'project': 'hcvtarget-uf', @@ -37,7 +40,7 @@ def setUp(self): }, 'errors' : [], } - self.specimen_taken_time_summary = {'total': 15, 'blank': 3} + self.test_alert_summary = { 'multiple_values_alert': [ 'This is multiple values alert 1', @@ -48,6 +51,15 @@ def setUp(self): 'This is max event alert 2', 'This is max event alert 3'] } + + self.specimen_taken_time_summary = {'total': 15, 'blank': 3} + self.duration_dict = { + 'all' : { + 'start': "2014-01-01 00:00:00", + 'end': "2014-01-01 00:00:01", + } + } + self.expected_xml = '''
@@ -154,6 +166,9 @@ def setUp(self): 20.0 lab_id + 00:00:00 + 00:00:01 + 0:00:01 ''' self.schema_str = StringIO('''\ @@ -265,6 +280,9 @@ def setUp(self): + + + @@ -292,10 +310,9 @@ def __call__(self, *args, **kwargs): writer) creator.create_report(self.test_report_data, self.test_alert_summary, - self.specimen_taken_time_summary) + self.specimen_taken_time_summary, self.duration_dict) result = writer.result - result_string = etree.tostring(result) #print result_string xmlschema_doc = etree.parse(self.schema_str) diff --git a/test/TestResume.py b/test/TestResume.py index df255c5..c7589b2 100644 --- a/test/TestResume.py +++ b/test/TestResume.py @@ -15,13 +15,15 @@ class FileDeleted(): pass import redi.redi - redi = reload(redi.redi) + redi_ref = reload(redi.redi) + redi_ref._person_form_events_service = MockPersonFormEvents() - redi._person_form_events_service = MockPersonFormEvents() - redi._check_input_file = lambda *args: None + import redi.batch + batch = reload(redi.batch) + batch.check_input_file = lambda *args: None with self.assertRaises(FileDeleted): - redi._run(config_file=None, configuration_directory='', + redi_ref._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, database_path=None, redcap_client=None, @@ -39,18 +41,21 @@ class FileStored(): pass import redi.redi - redi = reload(redi.redi) + redi_ref = reload(redi.redi) - redi._person_form_events_service = MockPersonFormEvents() - redi._check_input_file = lambda *args: None - redi._create_person_form_event_tree_with_data = lambda *args: (None, None, None, None) - redi._delete_last_runs_data = lambda *args: None - redi._removedirs = lambda *args: None - redi._mkdir = lambda *args: None - redi.connect_to_redcap = lambda *args: None + redi_ref._person_form_events_service = MockPersonFormEvents() + redi_ref._create_person_form_event_tree_with_data = lambda *args: (None, None, None, None) + redi_ref._delete_last_runs_data = lambda *args: None + redi_ref._removedirs = lambda *args: None + redi_ref._mkdir = lambda *args: None + redi_ref.connect_to_redcap = lambda *args: None + + import redi.batch + batch = reload(redi.batch) + batch.check_input_file = lambda *args: None with self.assertRaises(FileStored): - redi._run(config_file=None, configuration_directory='', + redi_ref._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, database_path=None, redcap_client=None, @@ -65,13 +70,15 @@ class DataFetched(): pass import redi.redi - redi = reload(redi.redi) + redi_ref = reload(redi.redi) + redi_ref._person_form_events_service = MockPersonFormEvents() - redi._person_form_events_service = MockPersonFormEvents() - redi._check_input_file = lambda *args: None + import redi.batch + batch = reload(redi.batch) + batch.check_input_file = lambda *args: None with self.assertRaises(DataFetched): - redi._run(config_file=None, configuration_directory='', + redi_ref._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, settings=MockSettings(), data_folder=None, database_path=None, resume=True, redcap_client=None, @@ -81,3 +88,6 @@ class DataFetched(): class MockSettings(object): def __getattr__(self, item): return '' if ('file' in item) else None + +if __name__ == '__main__': + unittest.main() diff --git a/test/TestThrottle.py b/test/TestThrottle.py index a816b51..178138f 100644 --- a/test/TestThrottle.py +++ b/test/TestThrottle.py @@ -9,34 +9,76 @@ class TestThrottle(unittest.TestCase): def test_throttle(self): - class Clock(object): - def __init__(self): - self.now = datetime.datetime.now() + clock = MockClock() + throttle.Throttle._now = clock + throttle.Throttle._sleep = clock.add_seconds + + call = throttle.Throttle(lambda: None, max_calls=3, + interval_in_seconds=5) + + call() + call() + call() + self.assertEquals(3, len(call._timestamps)) + before = clock.now - def __call__(self): - return self.now + call() - def add_seconds(self, seconds): - self.now += datetime.timedelta(seconds=seconds) + self.assertEqual(5, (clock.now - before).total_seconds()) - clock = Clock() + def test_throttle_front_loaded(self): + clock = MockClock() throttle.Throttle._now = clock throttle.Throttle._sleep = clock.add_seconds call = throttle.Throttle(lambda: None, max_calls=3, interval_in_seconds=5) - call() # t=0 - clock.add_seconds(1) - call() # t=1 - clock.add_seconds(2) - call() # t=3 - clock.add_seconds(1) - call() # t=4 + call() + call() + call() + self.assertEquals(3, len(call._timestamps)) + clock.add_seconds(5) + before = clock.now + + call() + self.assertEquals(1, len(call._timestamps)) - clock.add_seconds(1) - call() # t=5 + self.assertEqual(0, (clock.now - before).total_seconds()) + + def test_throttle_back_loaded(self): + clock = MockClock() + throttle.Throttle._now = clock + throttle.Throttle._sleep = clock.add_seconds + + call = throttle.Throttle(lambda: None, max_calls=3, + interval_in_seconds=5) + + start = clock.now + call() + clock.add_seconds(4) + call() + call() + self.assertEquals(3, len(call._timestamps)) + + call() + self.assertEquals(3, len(call._timestamps)) + self.assertEqual(5, (clock.now - start).total_seconds()) + + call() self.assertEquals(2, len(call._timestamps)) + self.assertEqual(9, (clock.now - start).total_seconds()) + + +class MockClock(object): + def __init__(self): + self.now = datetime.datetime.now() + + def __call__(self): + return self.now + + def add_seconds(self, seconds): + self.now += datetime.timedelta(seconds=seconds) if __name__ == '__main__': diff --git a/vagrant/Makefile b/vagrant/Makefile index 5747bb3..ec5e02e 100644 --- a/vagrant/Makefile +++ b/vagrant/Makefile @@ -12,6 +12,7 @@ ifneq ("$(wildcard $(MAKE_CONFIG_FILE))", "") REDCAP_DB_SQL_FILE := $(CONFIG_FOLDER_VAGRANT)/$(shell cat ${MAKE_CONFIG_FILE} | sed -e 's/ //g' | grep -v '^\#' | grep 'redcap_db_sql_file=' | cut -d '=' -f2) REDCAP_CODE_ZIP_FILE := $(CONFIG_FOLDER_VAGRANT)/redcap.zip + REDCAP_CODE_PLUGIN_FOLDER := $(CONFIG_FOLDER_VAGRANT)/redcap_plugins REDCAP_SQL_PATCHES_FOLDER := $(CONFIG_FOLDER_VAGRANT)/sqlPatches ENROLLMENT_CSV_FILE := $(CONFIG_FOLDER_VAGRANT)/enrollment_test_data.csv REFERENCE_OUTPUT_FILE:= $(CONFIG_FOLDER_VAGRANT)/redi_out_reference.csv @@ -54,7 +55,7 @@ help: show_steps: @echo "\n Steps for importing data into the sample project:" @echo "\t make clean" - @echo "\t make copy_config_example" + @echo "\t make copy_config_example or make copy_config_develop" @echo "\t make copy_redcap_code" @echo "\t make copy_project_data" @echo "\t make show_config" @@ -63,6 +64,12 @@ show_steps: @echo "\t make rc_post" @echo "\t make rc_get" +fresh_vm_example: + vagrant destroy && make clean && make copy_config_example && make copy_redcap_code && make copy_project_data && vagrant up && make rc_enrollment && make rc_get + +fresh_vm_develop: + vagrant destroy && make clean && make copy_config_develop && make copy_redcap_code && make copy_project_data && vagrant up && make rc_enrollment && make rc_get + copy_config_example: @# Copy the config file for running make tasks cp ../config-example/vagrant-data/Makefile.ini . @@ -77,6 +84,7 @@ check_config: @test -f $(CONFIG_FILE) || (echo 'Please obtain the config file "$(CONFIG_FILE)"' && exit 1) @test -f $(REDCAP_DB_SQL_FILE) || (echo 'Please obtain the project sql dump file "$(REDCAP_DB_SQL_FILE)"' && exit 1) @test -f $(REDCAP_CODE_ZIP_FILE) || (echo 'Please obtain the redcap software zip file "$(REDCAP_CODE_ZIP_FILE)"' && exit 1) + @test -d $(REDCAP_CODE_PLUGIN_FOLDER) || (echo 'WARNING: did not find a REDCap plugins folder "$(REDCAP_CODE_PLUGIN_FOLDER)"') @test -f $(ENROLLMENT_CSV_FILE) || (echo 'Config error: missing file "$(ENROLLMENT_CSV_FILE)"' && exit 1) show_config: check_config @@ -91,6 +99,8 @@ show_config: check_config copy_redcap_code: check_config cp $(REDCAP_CODE_ZIP_FILE) . + @test -d plugins/ || (mkdir -p plugins/) + test ! -d $(REDCAP_CODE_PLUGIN_FOLDER) || (cp -R $(REDCAP_CODE_PLUGIN_FOLDER)/* plugins/) copy_project_data: check_config @# Bring in the REDCap database file with a name expected by bootstrap.sh @@ -116,6 +126,9 @@ rc_enrollment: check_config rc_post: python ../redi/redi.py -c $(CONFIG_FOLDER) +rc_post_skip_blanks: + python ../bin/redi.py -c $(CONFIG_FOLDER) --skip-blanks + rc_get: check_config $(REDCAP_RECORDS_CMD) -f "$(REDCAP_PROJECT_FORMS)" @@ -129,7 +142,6 @@ rc_get_enrollment_meta: @curl -X POST http://localhost:8998/redcap/api/ -d token=$(REDCAP_VM_TOKEN) -d content=metadata -d format=csv -d forms[]=enrollment rc_fresh: - make copy_project_data make rc_clean make rc_enrollment make rc_post @@ -142,6 +154,31 @@ rc_set_rate: # echo $(filter-out $@,$(MAKECMDGOALS)) vagrant ssh -c 'mysql -uroot -ppassword -e "UPDATE redcap.redcap_config SET value = $(filter-out $@,$(MAKECMDGOALS)) WHERE field_name = \"page_hit_threshold_per_minute\" "' +rc_remove_ban: + vagrant ssh -c 'mysql -uroot -ppassword -e "DELETE FROM redcap.redcap_ip_banned; DELETE FROM redcap.redcap_ip_cache;" ' + +rc_show_columns: + vagrant ssh -c 'mysql -uroot -ppassword -e "SELECT TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME, DATA_TYPE FROM information_schema.COLUMNS WHERE TABLE_SCHEMA = \"redcap\" " ' + +rc_show_logs: + vagrant ssh -c 'mysql -uroot -ppassword -e "SELECT log_event_id, ip, object_type, event_id, data_values, description, sql_log FROM redcap_log_event ORDER BY log_event_id DESC LIMIT 10" ' + +rc_disable_auth: + @# Use this task to remove login requirement for REDCap + vagrant ssh -c 'mysql -uroot -ppassword -e "UPDATE redcap.redcap_config SET value = \"none\" WHERE field_name = \"auth_meth_global\" "' +rc_enable_auth: + vagrant ssh -c 'mysql -uroot -ppassword -e "UPDATE redcap.redcap_config SET value = \"table\" WHERE field_name = \"auth_meth_global\" "' + +rc_enable_listing_nonauth_projects: + vagrant ssh -c 'mysql -uroot -ppassword -e "UPDATE redcap.redcap_config SET value = 1 WHERE field_name = \"display_nonauth_projects\" "' + +rc_test_plugin: + make rc_disable_auth + @# Note: To test the "show url" plugin you have to have records in the "demographics" form + @# http://localhost:8998/redcap/plugins/redi/show_url.php?project_name=HCV-TARGET+2.0+DEVELOPMENT&study_id=1&page_name=demographics&event_name=1 + curl -s -X POST http://localhost:8998/redcap/plugins/redi/show_url.php -d project_name=HCV-TARGET+2.0+DEVELOPMENT -d study_id=1 -d page_name=demographics -d event_name=1 | grep URL + make rc_enable_auth + rc_compare: @# This task can be used to compare the current project data with the reference data $(REDCAP_RECORDS_CMD) -f "$(REDCAP_PROJECT_FORMS)" > out.csv @@ -161,6 +198,10 @@ egg_test: check_config clean: @# This task removes all copied/generated files rm -f redcap.zip + rm -rf plugins rm -f Makefile.ini rm -f projectDataBootstrap.sql rm -f out.csv + +profile: + python -m cProfile -o redi.pstats ../redi/redi.py --skip-blanks -c $(CONFIG_FOLDER) -k && gprof2dot -f pstats redi.pstats | dot -Tsvg -o callgraph.svg && open -a "Google Chrome.app" callgraph.svg diff --git a/vagrant/Makefile_db b/vagrant/Makefile_db index c8f8132..173790e 100644 --- a/vagrant/Makefile_db +++ b/vagrant/Makefile_db @@ -1,5 +1,5 @@ -DB_FILE=$(shell python -c 'from appdirs import AppDirs; dirs = AppDirs("redi"); print ("\"" + dirs.user_data_dir + "/0.10.0/redi.db\"")') +DB_FILE=../redi.db .PHONY: help help: @@ -31,8 +31,7 @@ add: fresh: rm -f $(DB_FILE) - touch $(DB_FILE) - sqlite3 $(DB_FILE) 'CREATE TABLE RediBatch ( rbID INTEGER PRIMARY KEY AUTOINCREMENT, rbStartTime TEXT NOT NULL, rbEndTime TEXT, rbStatus TEXT, rbMd5Sum TEXT NOT NULL);' + sqlite3 $(DB_FILE) 'CREATE TABLE RediBatch ( rbID INTEGER PRIMARY KEY AUTOINCREMENT, rbCreateTime DATETIME DEFAULT CURRENT_TIMESTAMP, rbStartTime TEXT NOT NULL, rbEndTime TEXT, rbStatus TEXT, rbMd5Sum TEXT NOT NULL);' @test -s $(DB_FILE) || echo 'File $(DB_FILE) was not created' clean: diff --git a/vagrant/bootstrap_functions.sh b/vagrant/bootstrap_functions.sh index f804952..35a231c 100644 --- a/vagrant/bootstrap_functions.sh +++ b/vagrant/bootstrap_functions.sh @@ -10,6 +10,7 @@ function run_environment_updates() { # environment utils cp $SHARED_FOLDER/aliases /home/vagrant/.bash_aliases + cp $SHARED_FOLDER/vimrc /home/vagrant/.vimrc # Install libraries used by python apt-get update @@ -21,6 +22,9 @@ function run_environment_updates() { # configure MySQL to start every time update-rc.d mysql defaults + + # Install mcrypt package for PHP + apt-get install -y php5-mcrypt } function extract_redcap() { @@ -32,6 +36,12 @@ function extract_redcap() { REDCAP_VERSION_DETECTED=`ls /var/www/redcap | grep redcap_v | cut -d 'v' -f2 | sort -n | tail -n 1` echo "$REDCAP_ZIP_FILE content indicates Redcap version: $REDCAP_VERSION_DETECTED" + + # copy the plugin files to the redcap version detected + PLUGINS_DESTINATION_FOLDER="/var/www/redcap/plugins/redi" + mkdir -p $PLUGINS_DESTINATION_FOLDER + echo "Copying RED-I REDCap plugins to $PLUGINS_DESTINATION_FOLDER" + cp $SHARED_FOLDER/plugins/* $PLUGINS_DESTINATION_FOLDER # adjust ownership so apache can write to the temp folders chown -R www-data.root /var/www/redcap/edocs/ diff --git a/vagrant/vimrc b/vagrant/vimrc new file mode 100644 index 0000000..d578a46 --- /dev/null +++ b/vagrant/vimrc @@ -0,0 +1,56 @@ +set ls=2 +set bs=2 +set background=dark + +set nu +set nocompatible +set ruler +filetype on +syntax on +filetype indent on + +set autoindent +set smartindent +set splitbelow +set splitright +set showmatch + +set expandtab +set tabstop=4 " Set Tab size +set expandtab " Expand Tabs (pressing Tab inserts spaces) +set shiftwidth=4 " Number of spaces to use for each step of (auto)indent +set softtabstop=4 " makes the spaces feel like real tabs; one backspace goes back X spaces :) +set backspace=indent,eol,start +retab + + +set nowrapscan +set ignorecase " Ignore case in search +set smartcase " Consider case only when typing Uppercase +set incsearch " Show search results when typing +set hlsearch " highlight search pattern +set vb t_vb= " don't notify (no audio/visual bell) +set showmode " display mode INSERT/REPLACE/... +set scrolloff=3 " do not let the curser get too close to the edge +set laststatus=2 " laststatus: show status line? Yes, always! + +map! " F1 != Help; remove acces to help :) +map +" switch lines +map ddkkp +map :set number! + +nnoremap 2 wl + +" restore position +function! ResCur() + if line("'\"") <= line("$") + normal! g`" + return 1 + endif +endfunction + +augroup resCur + autocmd! + autocmd BufWinEnter * call ResCur() +augroup END