diff --git a/.gitignore b/.gitignore index 6aa3c74..e688c88 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,4 @@ redi.pstats callgraph.svg mprofile_* private/ +.ropeproject/ diff --git a/.travis.yml b/.travis.yml index 2488103..7260e92 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,8 +1,11 @@ + +# http://docs.travis-ci.com/user/migrating-from-legacy/ +sudo: false language: python python: - "2.7" before_install: - sudo apt-get install -y python-setuptools libxml2 libxslt1-dev python-dev + echo 'apt-get install -y libxml2 libxslt1-dev' install: - pip install requests - pip install lxml diff --git a/CHANGELOG b/CHANGELOG index b924ae3..8ea8533 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,23 @@ +2015-05-14 v0.14.2 + * Summary: Now RED-I can fetch more files initially and run pre-processors in a specific order. + + * Runs the dictionary of preprocessors in order sorted by their keys (module names). This is necessary when using multiple preprocessors that have ordered rules. Unsorted dictionaries have unpredictable ordering. (Nicholas Rejack) + * make clean: remove `.eggs` folder (Andrei Sura) + * Fixes #168 - add option to display version number (Andrei Sura) + * Moving logic for creating dictionary of files to download to __init__ method. Improved logging. (Nicholas Rejack) + * Updating TestGetEMRData.py to match new variable names in GetEmrData.py. (Nicholas Rejack) + * Updated GetEmrData.py to rename download_file variable to download_list and download_file function to download_files. (Nicholas Rejack) + * Fixing TestGetEMRData.py now that a new variable has been added to the EmrFileAccessDetails. (Nicholas Rejack) + * UpdatingGetEmrData.py with new functionality. Now, in settings.ini a dictionary of files to be downloaded can be specified for emr_data_file. (Nicholas Rejack) + * Changing variables in EmrFileAccessDetails for readability. (Nicholas Rejack) + * Update README.md (Christopher P. Barnes) + * Update README.md (Christopher P. Barnes) + * use travis in container (Andrei Sura) + * Add makefile tasks related to pypi (Andrei Sura) + * added RedHat/Fedora installation documentation. (Nicholas Rejack) + * bumping version number on docs/conf.py (Nicholas Rejack) + * repaired some broken text in about.rst in documentation. (Nicholas Rejack) + 2015-05-14 v0.14.2 * Summary: Preprocessing has been improved to use the rewritten raw CSV file after processing it. This relocates some of the XML processing from GetEMRData. diff --git a/Makefile b/Makefile index 1b692b7..f1c057d 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,9 @@ help: @echo "\t coverage - run code coverage analysis" @echo "\t lint - check code for sytax error" @echo "\t clean - remove generated files" - @echo "\t pypi - upload files to https://pypi.python.org/pypi/redi" + @echo "\t pypi_config - prepare the configuation for uploading the PyPi package" + @echo "\t pypi_register - register the package" + @echo "\t pypi_upload - upload the package https://pypi.python.org/pypi/redi" @echo "\t show_pips - show python packages installed globally" @echo "\t venv_help - show commands for installing 'redi' package in the 'venv' virtual environment" @echo "\t venv_show_pips - show python packages installed in the 'venv' virtual environment" @@ -37,7 +39,7 @@ lint: clean: find . -type f -name "*.pyc" -print | xargs rm -f - @rm -rf out dist build *.egg-info *.egg + @rm -rf out dist build *.egg-info .eggs @rm -rf nosetests.xml cover .coverage coverage.xml @rm -f pylint.out unittest_pysftp_rsa_key unittest_pysftp_rsa_key.pub destination_file source_file @rm -f formData.xml rawData.xml translationalData.xml rawDataWithFormName.xml rawDataWithFormCompletedField.xml @@ -45,9 +47,33 @@ clean: @rm -f all_form_events.xml person_form_event_tree.xml person_form_event_tree_with_data.xml @rm -f vagrant/redi.db config-example/report.xml redi.pstats mprofile_*.dat -pypi: - #https://pythonhosted.org/Distutils2/distutils/packageindex.html - python setup.py sdist register upload -r pypi +pypi_config: + @test -f ~/.pypirc || echo "Please create the ~/.pypirc file first. Here is a template: \n" + @test -f ~/.pypirc || (cat pypirc && exit 1) + +pypi_register: pypi_config + python setup.py register -r mdat + +pypi_upload: pypi_config + @# use secure submission: https://packaging.python.org/en/latest/distributing.html + which twine || pip install twine + #python setup.py sdist register upload -r redi + python setup.py sdist --formats=zip + twine upload dist/* -r mdat + @echo "Done. To test please execute:" + @echo "virtualenv venv && . venv/bin/activate && pip install redi && redi -h" + + +pypi_internal: + @# Test deployment on a local PyPi server + @# mkdir ~/packages && cd ~/packages + @# htpasswd -sc .htaccess tester + virtualenv venv && . venv/bin/activate + which pypi-server || pip install pypiserver + pypi-server -p 8080 -P .htaccess ~/packages & + python setup.py sdist register -r internal + python setup.py sdist upload -r internal + pip install --extra-index-url http://localhost:8080/simple/ redi show_pips: find /Library/Python/2.7/site-packages/ -maxdepth 2 -name __init__.py | cut -d '/' -f 6 diff --git a/docs/about.rst b/docs/about.rst index 6f2e273..f7ec168 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -106,43 +106,156 @@ Installing RED-I on Windows * Open a command prompt by clicking on the Start menu, and typing "cmd" in the Run box. * Install 64-bit Python 2.7.9 by running the following command in the command prompt: -
-msiexec /i https://www.python.org/ftp/python/2.7.9/python-2.7.9.amd64.msi 
-
+ +.. raw:: html + +
+      msiexec /i https://www.python.org/ftp/python/2.7.9/python-2.7.9.amd64.msi 
+   
+ * Next you need to be insure the command interpreter will be able to find the Python modules. Set the paths to the modules by running the following commands in the command prompt: -
-setx path "%path%;c:\python27"
-setx path "%path%;c:\python27\lib\site-packages"
-setx path "%path%;c:\python27\scripts”
-
+ +.. raw:: html + +
+      setx path "%path%;c:\python27"
+      setx path "%path%;c:\python27\lib\site-packages"
+      setx path "%path%;c:\python27\scripts”
+   
+ * Make a new directory for the RED-I files by running the following command in the command prompt: -
-mkdir c:\redi
-
+ +.. raw:: html + +
+      mkdir c:\redi
+   
+ * Download the RED-I source code from: [https://github.com/ctsit/redi/archive/0.14.1.zip] * Copy the contents of the RED-I zip file from c:\Users\%username%\Downloads\redi-0.14.1\redi-0.14.1 to c:\redi * Download the easy_install setup file from: https://bootstrap.pypa.io/ez_setup.py * Run the easy_install setup file with the following command in the command prompt: -
-python c:\Users\%username%\Downloads\ez_setup.py
-
+ +.. raw:: html + +
+      python c:\Users\%username%\Downloads\ez_setup.py
+   
+ Note: you may need to modify the path to the ez_setup.py file if it is downloaded to a different location. * Next, make a binary install of RED-I by running the following commands in the command prompt: -
-cd c:\redi
-python c:\redi\setup.py bdist_egg
-
+ +.. raw:: html + +
+      cd c:\redi
+      python c:\redi\setup.py bdist_egg
+   
+ * You will need to manually install the pycrypto dependency. To avoid having to compile it with VCForPython you can download a pre-compiled binary and install it with the following command: -
-c:\python27\scripts\easy_install http://www.voidspace.org.uk/python/pycrypto-2.6.1/pycrypto-2.6.1.win-amd64-py2.7.exe
-
+ +.. raw:: html + +
+      c:\python27\scripts\easy_install http://www.voidspace.org.uk/python/pycrypto-2.6.1/pycrypto-2.6.1.win-amd64-py2.7.exe
+   
+ * Finally, install your binary of RED-I with the following command: -
-c:\python27\scripts\easy_install.exe c:\redi\dist\redi-0.14.1-py2.7.egg
-
+ +.. raw:: html + +
+      c:\python27\scripts\easy_install.exe c:\redi\dist\redi-0.14.1-py2.7.egg
+   
+ +Installing RED-I on Red Hat and Fedora +---------------------------- +Download and install setuptools. Setuptools will aid you in installing the redi package. + +.. raw:: html + +
+   curl "https://bootstrap.pypa.io/get-pip.py" -o "get-pip.py" && sudo python get-pip.py
+   
+ +Note that you must have gcc (the Gnu Compiler Collection) to build RED-I. Check that you have gcc installed: + +.. raw:: html + +
+   gcc --version
+   
+ +If gcc is not installed, install it: + +.. raw:: html + +
+   sudo yum install gcc
+   
+ +Install the development libxslt, libxml2, and python-devel libraries. These allow you to build the redi source. + +.. raw:: html + +
+   sudo yum install libxslt-devel libxml2-devel python-devel
+   
+ +Install redi using pip. + +.. raw:: html + +
+   sudo pip install redi
+   
+ +RED-I is now be installed. + +If you get an error message while compiling pycrypto, you will need to install pycrypto separately: + +.. raw:: html + +
+   sudo yum install python-crypto
+   
+ +* To use the example config, documentation, and other associated RED-I files, you will need to get files from the GitHub repository. You have two options: + +1. Clone the repo by using Git. + +.. raw:: html + +
+   yum install git
+   
+ +Set up your install of Git to use the key on your GitHub account. Instructions are at: https://help.github.com/articles/generating-ssh-keys/ + +Now, clone the redi git repo: + +.. raw:: html + +
+   clone git@github.com:ctsit/redi.git
+   
+ +You now have a directory called redi with the source, docs, example configuration and other RED-I files. + +2. Download the zip file + +.. raw:: html + +
+   wget https://github.com/ctsit/redi/archive/master.zip
+   sudo yum install unzip
+   unzip master.zip
+   
+ +* You now have a directory called redi-master with the source, docs, example configuration and other RED-I files. How to Test RED-I with a Sample Project --------------------------------------- diff --git a/docs/conf.py b/docs/conf.py index ff12d83..d166cae 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -61,9 +61,9 @@ # built documents. # # The short X.Y version. -version = '0.11.3' +version = '0.14.2' # The full version, including alpha/beta/rc tags. -release = '0.11.3' +release = '0.14.2' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/redi_usage.rst b/docs/redi_usage.rst index c3ebeab..7ca86ec 100644 --- a/docs/redi_usage.rst +++ b/docs/redi_usage.rst @@ -10,6 +10,20 @@ Optional command-line arguments: -------------------------------- - -h, --help: show the help message + +- -v, --verbose: increase verbosity of output + + :: + + $ redi -v + +- -V, --version: Show version number + + :: + + $ redi -V + + - -c: Specify the path to the configuration folder. :: @@ -24,7 +38,7 @@ Optional command-line arguments: :: - $ redi -k + $ redi -k When this parameter is provided, the output files are stored in **/out/out\_**. @@ -113,9 +127,6 @@ Optional command-line arguments: directory. Using this switch, one can run multiple instances of redi simultaneously. -- -v, --verbose: increase verbosity of output - - $ redi --verbose - --skip-blanks: skip blank events when sending event data to RedCAP diff --git a/pypirc b/pypirc new file mode 100644 index 0000000..8b319f3 --- /dev/null +++ b/pypirc @@ -0,0 +1,9 @@ +[distutils] + +index-servers = + mdat + +[mdat] +repository: https://pypi.python.org/pypi +username: mdat +password: *** diff --git a/redi/redi.py b/redi/redi.py index d6a9762..018d3dd 100755 --- a/redi/redi.py +++ b/redi/redi.py @@ -14,40 +14,47 @@ # All rights reserved. # # Distributed under the BSD 3-Clause License -# For full text of the BSD 3-Clause License see http://opensource.org/licenses/BSD-3-Clause +# For full text of the BSD 3-Clause License see +# http://opensource.org/licenses/BSD-3-Clause """ redi.py - Converter from raw clinical data in XML format to REDCap API data Usage: - redi.py -h | --help - redi.py [-v] [-k] [-e] [-d] [-r] [-c=] [-D=] [-s] [-b] + redi.py -h | --help + redi.py [-v] [-V] [-k] [-e] [-d] [-r] [-c=] [-D=] [-s] [-b] Options: - -h --help show this help message and exit - -v --verbose Increase verbosity of output [default:False] - -k --keep Specify this option to preserve the files generated during execution [default:False] - -e --emrdata Specify this option to get EMR data [default:False] - -d --dryrun To execute redi.py in dry run state. This is to be - able to test each release by doing a dry run, where - the data is fetched and processed but not transferred - to the production REDCap. Email is also not sent. The - processed data is stored as output files under the - "out" folder under project root [default:False]. - -r --resume WARNING!!! Resumes the last run of the program. This - switch is for a specific scenario. Check the - documentation before using it [default:False] - -c --config-path= Specify the path to the configuration directory - -D --datadir= Specify the path to the directory containing project - specific input and output data which will help in - running multiple simultaneous instances of redi for - different projects - -s --skip-blanks skip blank events when sending event data to REDCap [default:False] - -b --bulk-send-blanks send blank events in bulk instead of individually [default:False] + -h --help Show this help message and exit + -v --verbose Increase output verbosity [default:False] + -V --version Show version number [default:False] + -k --keep Use this option to preserve the files + generated during execution [default:False] + -e --emrdata Use this option to get EMR data [default:False] + -d --dryrun To execute redi.py in dry run state. This + is to be able to test each release by doing + a dry run, where the data is fetched and + processed but not transferred to the + production REDCap. Email is also not sent. + The processed data is stored as output + files under the "out" folder under project + root [default:False]. + -r --resume WARNING!!! Resumes the last run. This + switch is for a specific case. Check the + documentation before using it. [default:False] + -c --config-path= Specify the path to the configuration directory + -D --datadir= Specify the path to the directory containing + project specific input and output data which + will help in running multiple simultaneous + instances of redi for different projects + -s --skip-blanks Skip blank events when sending data to REDCap + [default:False] + -b --bulk-send-blanks Send blank events in bulk instead of + individually [default:False] """ __author__ = "University of Florida CTS-IT Team" -__version__ = "0.14.2" +__version__ = "0.14.3" __email__ = "ctsit@ctsi.ufl.edu" __status__ = "Development" @@ -137,14 +144,23 @@ def main(): data_directory = args['--datadir'] if data_directory is None: data_directory = DEFAULT_DATA_DIRECTORY + configuration_directory = args['--config-path'] if configuration_directory is None: configuration_directory = os.path.join(data_directory, "config") + do_keep_gen_files = args['--keep'] get_emr_data = args['--emrdata'] dry_run = args['--dryrun'] - #configure logger + # display version number and quit + if args['--version']: + print(__version__) + print("redi - REDCap Electronic Data Importer") + print("http://redi.readthedocs.org") + sys.exit() + + # configure logger logger = configure_logging(data_directory, args['--verbose']) # Parsing the config file using a method from module SimpleConfigParser @@ -278,8 +294,9 @@ def connect_to_redcap(email_settings, redcap_settings, dry_run=False): redcap_settings['verify_ssl']) except RequestException as error: logger.exception(error) - logger.info("Sending email to redcap support") + if not dry_run: + logger.info("Sending email to redcap support") redi_email.send_email_redcap_connection_error(email_settings) sys.exit() @@ -295,7 +312,8 @@ def _run(config_file, configuration_directory, do_keep_gen_files, dry_run, # Getting EMR data if get_emr_data: connection_details = EmrFileAccessDetails( - os.path.join(settings.emr_sftp_project_name, settings.emr_data_file), + settings.emr_sftp_project_name, + settings.emr_data_file, settings.emr_sftp_server_hostname, settings.emr_sftp_server_username, settings.emr_sftp_server_password, @@ -1977,7 +1995,7 @@ def load_preproc(preprocessors, root='./'): loaded = {} - for (preprocessor, path) in ast.literal_eval(preprocessors).iteritems(): + for (preprocessor, path) in ast.literal_eval(preprocessors.sorted()).iteritems(): module = None if os.path.exists(path): module = imp.load_source(preprocessor, path) diff --git a/redi/utils/GetEmrData.py b/redi/utils/GetEmrData.py index 32f4ef7..5c0866b 100644 --- a/redi/utils/GetEmrData.py +++ b/redi/utils/GetEmrData.py @@ -26,6 +26,8 @@ from csv2xml import openio, Writer from paramiko.ssh_exception import SSHException, BadAuthenticationType import sys +import ast +import copy logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -37,7 +39,8 @@ class EmrFileAccessDetails(object) : @see redi#_run() """ def __init__(self, - emr_download_file, + emr_sftp_project_name, + emr_download_list, emr_host, emr_username, emr_password, @@ -46,7 +49,13 @@ def __init__(self, emr_private_key_pass ): - self.download_file = emr_download_file + self.sftp_project_name = emr_sftp_project_name + try: + self.download_list = ast.literal_eval(emr_download_list) + # maintain backwards compatibility with existing config repos + # if it doesn't automatically evaluate to a dictionary, make a dictionary + except ValueError: + self.download_list = {str(emr_download_list): "raw.txt"} self.host = emr_host self.username = emr_username self.password = emr_password @@ -58,7 +67,7 @@ def __init__(self, # Module level functions #============================ -def download_file(destination, access_details): +def download_files(destination, access_details): """ Download a file from the sftp server :destination the name of the file which will be downloaded @@ -67,15 +76,16 @@ def download_file(destination, access_details): @see get_emr_data() """ connection_info = dict(access_details.__dict__) - # delete unnecessary element form the dictionary - del connection_info['download_file'] + # delete unnecessary elements form the dictionary + del connection_info['download_list'] + del connection_info['sftp_project_name'] # check for errors during authentication with EMR server try: with pysftp.Connection(**connection_info) as sftp: logger.info("User %s connected to sftp server %s" % \ (connection_info['username'], connection_info['host'])) - sftp.get(access_details.download_file, destination) + sftp.get(access_details.download_list, destination) except IOError as e: logger.error("Please verify that the private key file mentioned in "\ "settings.ini exists.") @@ -161,7 +171,16 @@ def get_emr_data(conf_dir, connection_details): :conf_dir configuration directory name :connection_details EmrFileAccessDetails object """ - raw_txt_file = os.path.join(conf_dir, 'raw.txt') - - # download csv file - download_file(raw_txt_file, connection_details) + number_of_files = len(connection_details.download_list) + counter = 1 + for key in connection_details.download_list: + logger.info("Now downloading %i of %i file(s)", counter, number_of_files) + # make a copy of the dict + temp_connection_details = copy.deepcopy(connection_details) + # download the next file in the dict + raw_txt_file = os.path.join(conf_dir, connection_details.download_list[key]) + temp_connection_details.download_list = os.path.join(connection_details.sftp_project_name, key) + logger.info("Downloading remote file file: " + temp_connection_details.download_list) + logger.info("Saving to local file name: " + raw_txt_file) + download_files(raw_txt_file, temp_connection_details) + counter += 1 diff --git a/setup.py b/setup.py index c9a3e21..491c756 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ setup( name='redi', - version='0.14.2', + version='0.14.3', author='https://www.ctsi.ufl.edu/research/study-development/informatics-consulting/', author_email='ctsit@ctsi.ufl.edu', packages=find_packages(exclude=['test']), diff --git a/test/TestGetEMRData.py b/test/TestGetEMRData.py index 5aa184c..197a3bd 100644 --- a/test/TestGetEMRData.py +++ b/test/TestGetEMRData.py @@ -43,7 +43,7 @@ def _noop(*args, **kwargs): @patch.multiple(pysftp, Connection=_noop) - @patch.multiple(GetEmrData, download_file=_noop) + @patch.multiple(GetEmrData, download_files=_noop) def test_get_emr_data(self): """ This test verifies only that the csv file on the sftp server @@ -63,7 +63,8 @@ def test_get_emr_data(self): f.write(input_string) props = EmrFileAccessDetails( - emr_download_file='raw.csv', + emr_sftp_project_name='/', + emr_download_list='raw.csv', emr_host='localhost', emr_username='admin', emr_password='admin', @@ -202,7 +203,8 @@ def create_sample_file(sample_file): def get_connection_info(private_key): """Return a dictionary of parameters for creating a sftp connection""" access_details = EmrFileAccessDetails( - emr_download_file='raw.csv', + emr_sftp_project_name='/', + emr_download_list='raw.csv', emr_host='localhost', emr_username='admin', emr_password='admin', @@ -212,8 +214,9 @@ def get_connection_info(private_key): ) connection_info = dict(access_details.__dict__) - # delete unnecessary element form the dictionary - del connection_info['download_file'] + # delete unnecessary elements form the dictionary + del connection_info['sftp_project_name'] + del connection_info['download_list'] return connection_info