diff --git a/.gitignore b/.gitignore index 2f22350..0faeca4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +.coverage +cover/ +coverage.xml +nosetests.xml *.eggs venv/ *.pyc diff --git a/CHANGELOG b/CHANGELOG index 8505c03..2015adf 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,29 @@ +2016-03-17 v0.15.1 + +* Summary: RED-I now accepts a file as input from the command line, using -f FILENAME. + +* add tests to validate the input file used with the commandline switch -f or --file (Christopher Barnes) +* move logger setup and config to start sooner since it is needed by other functions (Christopher Barnes) +* add testdata.csv to use as test input for commandline options test and development (Christopher Barnes) +* added .coverage, cover/, coverage.xml and nosettest.xml to .gitignore. The are local to my test runs (Christopher Barnes) +* add TODO items (Christopher Barnes) +* add todo item for adding a switch to allow another sorting behavior for data points and events (Christopher Barnes) +* Fixing broken test. (Nicholas Rejack) +* added raw text file to def _run and function signature (Kevin Steven Hanson) +* moved raw txt exception out of run function (Kevin Steven Hanson) +* adding option to bypass raw.txt with an if else block that makes more sense than my previous one (Kevin Steven Hanson) +* Fixing bad operator. (Nicholas Rejack) +* Adding code to catch if user tries to use -e and -f at same time (Nicholas Rejack) +* added ability to use input file and bypass hard coded raw.txt (Kevin Steven Hanson) +* i called logger in my agruement before it was defined, my bad (Kevin Steven Hanson) +* adding info statement to work with logger for -f argument (Kevin Steven Hanson) +* Added the missing usage for the file input. (Buck72) +* Add the flag for input file for REDI to use. (Buck72) +* created input file path to hanlde -f file handle with a basic print statement for now (Kevin Steven Hanson) +* add .eggs as correction for directory name to .gitignore (Christopher Barnes) +* add .egg and virtualenvironment folder venv from instructions to .gitignore (Christopher Barnes) +* Adding a new exception to watch for in redcapClient.py: ConnectionError (Nicholas Rejack) + 2016-02-12 v0.15.0 * Summary: Improved handling of network issues while sending to REDCap. diff --git a/config-example/testdata.csv b/config-example/testdata.csv new file mode 100755 index 0000000..62533ab --- /dev/null +++ b/config-example/testdata.csv @@ -0,0 +1,7 @@ +"STUDY_ID","NAME","LOINC_CODE","RESULT","REFERENCE_UNIT","DATE_TIME_STAMP" +"001","payment_status","1234","UNABLE TO PAY","","2016-03-01" +"001","payment_status","1234","ABLE TO PAY","","2016-03-02" +"001","preferred_dinner","5678","turkey, no gravy","2016-05-01" +"001","drink","5679","Tab","2016-05-01" +"001","preferred_dinner","5678","full rack of ribs, onion rings","2016-05-02" +"001","drink","5679","sweet tea, extra sugar","2016-05-02" diff --git a/docs/conf.py b/docs/conf.py index d166cae..28498f7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -54,16 +54,16 @@ # General information about the project. project = u'RED-I' -copyright = u'2014-2015, CTS-IT' +copyright = u'2014-2016, CTS-IT' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.14.2' +version = '0.15.1' # The full version, including alpha/beta/rc tags. -release = '0.14.2' +release = '0.15.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/redi/TODO.md b/redi/TODO.md new file mode 100644 index 0000000..b8469eb --- /dev/null +++ b/redi/TODO.md @@ -0,0 +1,26 @@ +# Write a script to generate this file, along the lines of +# `grep -r -i "TODO" . >> TODO.md` but something to eliminate duplicates. + +./redi.py: # TODO: add `a` switch to redi to tell it to keep all data points from the input file. +./batch.py: # TODO: refactor code to use ConfigParser.RawConfigParser in order to +./redi.py: # TODO: UPDATE COMMENT HERE +./redi.py: # TODO better description of commandline argument handling +./redi.py: #TODO: make parameters configurable +./redi.py: # TODO: post processing will go here +./redi.py: # TODO: make this able to run against a local file if desired +./redi.py: # TODO: Add preproc errors to report +./redi.py:# TODO: look at adding a switch to RED-I, that will need to be caught here, that +./redi.py:# @TODO: remove settings from signature +./redi.py: TODO: fix load_rules and load_prerules for better parallelism +./redi.py: # TODO figure out if this creates a sub process or not +./redi.py: # TODO need to check for program exe3cution otherwise give error +./report.py: # TODO: Replace this with a "backup_courier" constructor injection +./report.py: # TODO: remove dependency on the order of the xml elements in the report +./upload.py: @TODO: evaluate performance +./utils/GetEmrData.py: # TODO: Replace csv2xml.py entirely? +./utils/SimpleConfigParser.py:# TODO: +./utils/throttle.py: # @TODO: investigate why the deque is not +# TODO: make tests: requirement -- msg= mock requires setuptools>=17.1. Aborting installation +TODO: make tests requirement- msg=sudo easy_install sftpserver +TODO: make all settings files pythonic aka config.yml .. etc using YAML standard +TODO: take NOT_DONE examples for values without units data points out of vagrant/redi_out_reference.csv for hcv examples diff --git a/redi/redi.py b/redi/redi.py index f2079c8..e67cbe1 100755 --- a/redi/redi.py +++ b/redi/redi.py @@ -22,7 +22,7 @@ Usage: redi.py -h | --help - redi.py [-v] [-V] [-k] [-e] [-d] [-r] [-c=] [-D=] [-s] [-b] + redi.py [-v] [-V] [-k] [-e] [-d] [-f=] [-r] [-c=] [-D=] [-s] [-b] Options: -h --help Show this help message and exit @@ -39,6 +39,7 @@ The processed data is stored as output files under the "out" folder under project root [default:False]. + -f --file= Specify the path and filename to use as input for REDI. -r --resume WARNING!!! Resumes the last run. This switch is for a specific case. Check the documentation before using it. [default:False] @@ -142,9 +143,6 @@ def main(): # obtaining command line arguments for path to configuration directory args = docopt(__doc__, help=True) - # TODO create local variable to catch commandline arguement -f - input_file_path = args['-f'] - print input_file_path data_directory = args['--datadir'] if data_directory is None: @@ -154,10 +152,52 @@ def main(): if configuration_directory is None: configuration_directory = os.path.join(data_directory, "config") + do_keep_gen_files = args['--keep'] get_emr_data = args['--emrdata'] dry_run = args['--dryrun'] + # setup the logger right away + # configure logger + #TODO: make logger parameters configurable + logger = configure_logging(data_directory, args['--verbose'], when='D', interval=1, backup_count=31) + +# ______________________________________________________________________________ +# commandline switch: -f, --file +# ______________________________________________________________________________ +# -f, --file are commandline switches to allow the passing in of a data file to +# RED-I. The file should exist, and it shoudl be a CSV, and it should be +# readable. If no input is specified then use the defaults and look for a file +# called raw.txt in the specified `config` directory. +# ______________________________________________________________________________ + + # parse the commandline argument, args requires using long opt if it exists + # i.e. -f won't work here if --file is also part of the option. + input_file_path = args['--file'] + + # TODO: need to add this to the debug/verbose scaffolding + # print input_file_path + # sys.exit(0) + + # say something nice to the people. + logger.info("Using file passed in via -f switch. File name: " + input_file_path) + + # check to see if a file was passed in + if (input_file_path != ""): + # check to make sure its a file + if os.path.isfile(input_file_path): + #check to see if you can read it + if os.access(input_file_path, os.R_OK): + #ok , all is well make the assignment ;) + raw_txt_file = input_file_path + else: + logger.info("File passed in at the commandline cannot be accessed, file: " + input_file_path) + else: + logger.info("File passed in at the commandline is not a file: " + input_file_path) + + else: + raw_txt_file = os.path.join(configuration_directory, 'raw.txt') + # display version number and quit if args['--version']: print(__version__) @@ -165,9 +205,11 @@ def main(): print("http://redi.readthedocs.org") sys.exit() - # configure logger - #TODO: make parameters configurable - logger = configure_logging(data_directory, args['--verbose'], when='D', interval=1, backup_count=31) + + if input_file_path and get_emr_data: + logger.error("You cannot use -e and -f together.") + logger.error("RED-I will now terminate.") + sys.exit() # Parsing the config file using a method from module SimpleConfigParser settings = SimpleConfigParser.SimpleConfigParser() @@ -219,7 +261,7 @@ def main(): # This is the run that loads the data _run(config_file, configuration_directory, do_keep_gen_files, dry_run, - get_emr_data, settings, output_files, db_path, redcap_client, + get_emr_data, settings, output_files, db_path, raw_txt_file, redcap_client, report_courier, report_creator, args['--resume'], args['--skip-blanks'], args['--bulk-send-blanks']) @@ -308,7 +350,7 @@ def connect_to_redcap(email_settings, redcap_settings, dry_run=False): def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, - get_emr_data, settings, data_folder, database_path, redcap_client, + get_emr_data, settings, data_folder, database_path, raw_txt_file, redcap_client, report_courier, report_creator, resume=False, skip_blanks=False, bulk_send_blanks=False): global translational_table_tree @@ -336,7 +378,6 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, errors = run_preproc(pre_filters, settings) map(logger.warning, errors) - raw_txt_file = os.path.join(configuration_directory, 'raw.txt') escaped_file = os.path.join(configuration_directory, 'rawEscaped.txt') raw_xml_file = os.path.join(configuration_directory, 'raw.xml') @@ -909,6 +950,14 @@ def compress_data_using_study_form_date(data): logger.debug("Remove duplicate result using key: {}".format(key_debug)) subj.getparent().remove(subj) +# TODO: look at adding a switch to RED-I, that will need to be caught here, that +# will allow another behavioe here that will let us keep all results vs +# the current behavior of sorting the events by timestamp and keeping only +# the first one to occur on a given day. Example: whne this feature is +# implemented red-i will be able to keep only 1 data point for each day +# for 50 days or keep 50 data points that may occur on the same day and +# map the 50 into 50 event slots in redcap. + filt = dict() # third loop filters out all results in a "bucket" except the first one diff --git a/setup.py b/setup.py index 142b0a9..b07a3e7 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name='redi', - version='0.15.0', + version='0.15.1', author='https://www.ctsi.ufl.edu/research/study-development/informatics-consulting/', author_email='ctsit@ctsi.ufl.edu', packages=find_packages(exclude=['test']), @@ -27,7 +27,7 @@ 'redi': ['utils/*.xsl', 'utils/*.xsd'] }, url='https://github.com/ctsit/redi', - download_url = 'https://github.com/ctsit/redi/releases/tag/0.15.0', + download_url = 'https://github.com/ctsit/redi/releases/tag/0.15.1', keywords = ['EMR', 'EHR', 'REDCap', 'Clinical Data'], license='BSD 3-Clause', description='REDCap Electronic Data Importer', diff --git a/test/TestResume.py b/test/TestResume.py index 27e834b..e80b579 100644 --- a/test/TestResume.py +++ b/test/TestResume.py @@ -42,8 +42,8 @@ class FileDeleted(): with self.assertRaises(FileDeleted): redi_ref._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, - settings=MockSettings(), data_folder=None, - database_path=None, redcap_client=None, + settings=MockSettings(), data_folder=None, + database_path=None, raw_txt_file = None, redcap_client=None, report_courier=None, report_creator=None) def test_no_resume_stores(self): @@ -75,7 +75,7 @@ class FileStored(): with self.assertRaises(FileStored): redi_ref._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, - settings=MockSettings(), data_folder=None, + settings=MockSettings(), data_folder=None, raw_txt_file = None, database_path=None, redcap_client=None, report_courier=None, report_creator=None) @@ -98,7 +98,7 @@ class DataFetched(): with self.assertRaises(DataFetched): redi_ref._run(config_file=None, configuration_directory='', do_keep_gen_files=None, dry_run=True, get_emr_data=False, - settings=MockSettings(), data_folder=None, + settings=MockSettings(), data_folder=None, raw_txt_file = None, database_path=None, resume=True, redcap_client=None, report_courier=None, report_creator=None)