diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..12d63d205 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,28 @@ +name: Generate Docs + +on: + push: + branches: [ master ] + +jobs: + + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Python + uses: actions/setup-python@v1 + with: + python-version: '3.7' + + - name: Build + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + make docs + - name: Deploy + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{secrets.GITHUB_TOKEN}} + publish_dir: docs/_build/html diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 000000000..d90ef4ef8 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,30 @@ +name: Run Tests + +on: + push: + branches: [ '*' ] + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [3.5, 3.6, 3.7] + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + + - name: Test with tox + run: tox diff --git a/.travis.yml b/.travis.yml index ef8e31a6f..643f44d52 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,11 @@ # Config file for automatic testing at travis-ci.org -dist: trusty +dist: bionic language: python python: + - 3.7 - 3.6 - 3.5 -matrix: - include: - - python: 3.7 - dist: xenial - sudo: required - # Command to install dependencies install: pip install -U tox-travis codecov @@ -18,15 +13,3 @@ after_success: codecov # Command to run tests script: tox - -deploy: - - - provider: pages - skip-cleanup: true - github-token: "$GITHUB_TOKEN" - keep-history: true - local-dir: docs/_build/html - target-branch: gh-pages - on: - branch: master - python: 3.6 diff --git a/HISTORY.md b/HISTORY.md index 70bea0e99..fa41f42cc 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,11 @@ # History +## 0.2.2 - 2020-06-26 + +### Bugs Fixed + +* Escape `column_name` in hypertransformer - Issue [#110](https://github.com/sdv-dev/RDT/issues/110) by @csala + ## 0.2.1 - 2020-01-17 ### Bugs Fixed diff --git a/Makefile b/Makefile index 9a3acc8db..1b084afc0 100644 --- a/Makefile +++ b/Makefile @@ -91,23 +91,31 @@ lint: ## check style with flake8 and isort .PHONY: fix-lint fix-lint: ## fix lint issues using autoflake, autopep8, and isort - find rdt -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables - autopep8 --in-place --recursive --aggressive rdt - isort --apply --atomic --recursive rdt - - find tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables - autopep8 --in-place --recursive --aggressive tests - isort --apply --atomic --recursive tests + find rdt tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables + autopep8 --in-place --recursive --aggressive rdt tests + isort --apply --atomic --recursive rdt tests # TEST TARGETS +.PHONY: test-unit +test-unit: ## run tests quickly with the default Python + python -m pytest --cov=rdt + +.PHONY: test-readme +test-readme: ## run the readme snippets + rm -rf tests/readme_test && mkdir tests/readme_test + cd tests/readme_test && rundoc run --single-session python3 -t python3 ../../README.md + rm -rf tests/readme_test + .PHONY: test -test: ## run tests quickly with the default Python - python -m pytest --basetemp=${ENVTMPDIR} --cov=rdt +test: test-unit test-readme ## test everything that needs test dependencies + +.PHONY: test-devel +test-devel: lint docs ## test everything that needs development dependencies .PHONY: test-all -test-all: ## run tests on every Python version with tox +test-all: ## test using tox tox -r .PHONY: coverage @@ -142,21 +150,35 @@ dist: clean ## builds source and wheel package python setup.py bdist_wheel ls -l dist -.PHONY: test-publish -test-publish: dist ## package and upload a release on TestPyPI +.PHONY: publish-confirm +publish-confirm: + @echo "WARNING: This will irreversibly upload a new version to PyPI!" + @echo -n "Please type 'confirm' to proceed: " \ + && read answer \ + && [ "$${answer}" = "confirm" ] + +.PHONY: publish-test +publish-test: dist publish-confirm ## package and upload a release on TestPyPI twine upload --repository-url https://test.pypi.org/legacy/ dist/* .PHONY: publish -publish: dist ## package and upload a release +publish: dist publish-confirm ## package and upload a release twine upload dist/* .PHONY: bumpversion-release bumpversion-release: ## Merge master to stable and bumpversion release - git checkout stable || (git checkout -b stable && git push --set-upstream origin stable) + git checkout stable || git checkout -b stable git merge --no-ff master -m"make release-tag: Merge branch 'master' into stable" bumpversion release git push --tags origin stable +.PHONY: bumpversion-release-test +bumpversion-release-test: ## Merge master to stable and bumpversion release + git checkout stable || git checkout -b stable + git merge --no-ff master -m"make release-tag: Merge branch 'master' into stable" + bumpversion release --no-tag + @echo git push --tags origin stable + .PHONY: bumpversion-patch bumpversion-patch: ## Merge stable to master and bumpversion patch git checkout master @@ -164,6 +186,10 @@ bumpversion-patch: ## Merge stable to master and bumpversion patch bumpversion --no-tag patch git push +.PHONY: bumpversion-candidate +bumpversion-candidate: ## Bump the version to the next candidate + bumpversion candidate --no-tag + .PHONY: bumpversion-minor bumpversion-minor: ## Bump the version the next minor skipping the release bumpversion --no-tag minor @@ -172,13 +198,21 @@ bumpversion-minor: ## Bump the version the next minor skipping the release bumpversion-major: ## Bump the version the next major skipping the release bumpversion --no-tag major -.PHONY: bumpversion-candidate -bumpversion-candidate: ## Bump the version to the next candidate - bumpversion candidate --no-tag +.PHONY: bumpversion-revert +bumpversion-revert: ## Undo a previous bumpversion-release + git checkout master + git branch -D stable +CLEAN_DIR := $(shell git status --short | grep -v ??) CURRENT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) CHANGELOG_LINES := $(shell git diff HEAD..origin/stable HISTORY.md 2>&1 | wc -l) +.PHONY: check-clean +check-clean: ## Check if the directory has uncommitted changes +ifneq ($(CLEAN_DIR),) + $(error There are uncommitted changes) +endif + .PHONY: check-master check-master: ## Check if we are in master branch ifneq ($(CURRENT_BRANCH),master) @@ -192,14 +226,21 @@ ifeq ($(CHANGELOG_LINES),0) endif .PHONY: check-release -check-release: check-master check-history ## Check if the release can be made +check-release: check-clean check-master check-history ## Check if the release can be made + @echo "A new release can be made" .PHONY: release release: check-release bumpversion-release publish bumpversion-patch +.PHONY: release-test +release-test: check-release bumpversion-release-test publish-test bumpversion-revert + .PHONY: release-candidate release-candidate: check-master publish bumpversion-candidate +.PHONY: release-candidate-test +release-candidate-test: check-clean check-master publish-test + .PHONY: release-minor release-minor: check-release bumpversion-minor release diff --git a/README.md b/README.md index 7283e106e..baee72d72 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ An open source project from Data to AI Lab at MIT.

+[![Development Status](https://img.shields.io/badge/Development%20Status-2%20--%20Pre--Alpha-yellow)](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha) [![PyPi Shield](https://img.shields.io/pypi/v/RDT.svg)](https://pypi.python.org/pypi/RDT) [![Travis CI Shield](https://travis-ci.org/sdv-dev/RDT.svg?branch=master)](https://travis-ci.org/sdv-dev/RDT) [![Coverage Status](https://codecov.io/gh/sdv-dev/RDT/branch/master/graph/badge.svg)](https://codecov.io/gh/sdv-dev/RDT) @@ -11,6 +12,7 @@ # RDT: Reversible Data Transforms * License: [MIT](https://github.com/sdv-dev/RDT/blob/master/LICENSE) +* Development Status: [Pre-Alpha](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha) * Documentation: https://sdv-dev.github.io/RDT * Homepage: https://github.com/sdv-dev/RDT @@ -23,31 +25,16 @@ the transformations in order to revert them as needed. ## Requirements -**RDT** has been developed and tested on [Python 3.5, 3.6 and 3.7](https://www.python.org/downloads) +**RDT** has been developed and tested on [Python 3.5 and 3.6, 3.7](https://www.python.org/downloads/) -Also, although it is not strictly required, the usage of a -[virtualenv](https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid +Also, although it is not strictly required, the usage of a [virtualenv]( +https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering with other software installed in the system where **RDT** is run. -These are the minimum commands needed to create a virtualenv using python3.6 for **RDT**: - -```bash -pip install virtualenv -virtualenv -p $(which python3.6) rdt-venv -``` - -Afterwards, you have to execute this command to have the virtualenv activated: - -```bash -source rdt-venv/bin/activate -``` - -Remember about executing it every time you start a new console to work on **RDT**! - ## Install with pip -After creating the virtualenv and activating it, we recommend using -[pip](https://pip.pypa.io/en/stable/) in order to install **RDT**: +The easiest and recommended way to install **RDT** is using [pip]( +https://pip.pypa.io/en/stable/): ```bash pip install rdt @@ -55,25 +42,9 @@ pip install rdt This will pull and install the latest stable release from [PyPi](https://pypi.org/). -## Install from source +If you want to install from source or contribute to the project please read the +[Contributing Guide](https://sdv-dev.github.io/RDT/contributing.html#get-started). -With your virtualenv activated, you can clone the repository and install it from -source by running `make install` on the `stable` branch: - -```bash -git clone git@github.com:sdv-dev/RDT.git -cd RDT -git checkout stable -make install -``` - -## Install for Development - -If you want to contribute to the project, a few more steps are required to make the project ready -for development. - -Please head to the [Contributing Guide](https://sdv-dev.github.io/RDT/contributing.html#get-started) -for more details about this process. # Quickstart @@ -90,7 +61,7 @@ a single column loaded as a `pandas.DataFrame` object. You can load some demo data using the `rdt.get_demo` function, which will return some random data for you to play with. -```python +```python3 from rdt import get_demo data = get_demo() @@ -119,8 +90,9 @@ RDT introduced some null values randomly. In this example we will use the datetime column, so let's load a `DatetimeTransformer`. -```python +```python3 from rdt.transformers import DatetimeTransformer + transformer = DatetimeTransformer() ``` @@ -130,7 +102,7 @@ Before being able to transform the data, we need the transformer to learn from i We will do this by calling its `fit` method passing the column that we want to transform. -```python +```python3 transformer.fit(data['3_datetime']) ``` @@ -139,7 +111,7 @@ transformer.fit(data['3_datetime']) Once the transformer is fitted, we can pass the data again to its `transform` method in order to get the transformed version of the data. -```python +```python3 transformed = transformer.transform(data['3_datetime']) ``` @@ -165,7 +137,7 @@ array([[1.61299380e+18, 0.00000000e+00], In order to revert the previous transformation, the transformed data can be passed to the `reverse_transform` method of the transformer: -```python +```python3 reversed_data = transformer.reverse_transform(transformed) ``` @@ -195,8 +167,9 @@ a table with multiple columns. In order to manuipulate a complete table we will need to load a `rdt.HyperTransformer`. -```python +```python3 from rdt import HyperTransformer + ht = HyperTransformer() ``` @@ -207,7 +180,7 @@ data. This is done by calling its `fit` method passing the `data` DataFrame. -```python +```python3 ht.fit(data) ``` @@ -216,7 +189,7 @@ ht.fit(data) Once the HyperTransformer is fitted, we can pass the data again to its `transform` method in order to get the transformed version of the data. -```python +```python3 transformed = ht.transform(data) ``` @@ -243,7 +216,7 @@ In order to revert the transformation and recover the original data from the tra we need to call `reverse_transform` method of the `HyperTransformer` instance passing it the transformed data. -```python +```python3 reversed_data = ht.reverse_transform(transformed) ``` diff --git a/rdt/__init__.py b/rdt/__init__.py index f0eba93a2..f7121cea1 100644 --- a/rdt/__init__.py +++ b/rdt/__init__.py @@ -5,7 +5,7 @@ __author__ = """MIT Data To AI Lab""" __email__ = 'dailabmit@gmail.com' -__version__ = '0.2.1' +__version__ = '0.2.2.dev1' import numpy as np import pandas as pd diff --git a/rdt/hyper_transformer.py b/rdt/hyper_transformer.py index 3b0ffeb09..9572c9396 100644 --- a/rdt/hyper_transformer.py +++ b/rdt/hyper_transformer.py @@ -1,3 +1,5 @@ +import re + import numpy as np from rdt.transformers import ( @@ -191,7 +193,7 @@ def _get_columns(data, column_name): ValueError: if no columns match. """ - regex = r'{}(#[0-9]+)?$'.format(column_name) + regex = r'{}(#[0-9]+)?$'.format(re.escape(column_name)) columns = data.columns[data.columns.str.match(regex)] if columns.empty: raise ValueError('No columns match_ {}'.format(column_name)) diff --git a/setup.cfg b/setup.cfg index 62aaaf23b..5a940a9fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.1 +current_version = 0.2.2.dev1 commit = True tag = True parse = (?P\d+)\.(?P\d+)\.(?P\d+)(\.(?P[a-z]+)(?P\d+))? diff --git a/setup.py b/setup.py index 0bfcc5666..d1dc372ed 100644 --- a/setup.py +++ b/setup.py @@ -1,19 +1,21 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- +"""The setup script.""" + from setuptools import setup, find_packages -with open('README.md') as readme_file: +with open('README.md', encoding='utf-8') as readme_file: readme = readme_file.read() -with open('HISTORY.md') as history_file: +with open('HISTORY.md', encoding='utf-8') as history_file: history = history_file.read() install_requires = [ - 'numpy>=1.15.4', - 'pandas>=0.23.4', - 'scipy>=1.1.0', - 'Faker>=1.0.1', + 'numpy>=1.15.4,<2', + 'pandas>=0.23.4,<1', + 'scipy>=1.1.0,<2', + 'Faker>=1.0.1,<2', ] setup_requires = [ @@ -23,39 +25,41 @@ tests_require = [ 'pytest>=3.4.2', 'pytest-cov>=2.6.0', + 'jupyter>=1.0.0,<2', + 'rundoc>=0.4.3,<0.5', ] development_requires = [ # general - 'bumpversion>=0.5.3', + 'bumpversion>=0.5.3,<0.6', 'pip>=9.0.1', - 'watchdog>=0.8.3', + 'watchdog>=0.8.3,<0.11', # docs - 'm2r>=0.2.0', - 'Sphinx>=1.7.1', - 'sphinx_rtd_theme>=0.2.4', + 'm2r>=0.2.0,<0.3', + 'Sphinx>=1.7.1,<3', + 'sphinx_rtd_theme>=0.2.4,<0.5', 'autodocsumm>=0.1.10', # style check - 'flake8>=3.7.7', - 'isort>=4.3.4', + 'flake8>=3.7.7,<4', + 'isort>=4.3.4,<5', # fix style issues - 'autoflake>=1.2', - 'autopep8>=1.4.3', + 'autoflake>=1.1,<2', + 'autopep8>=1.4.3,<2', # distribute on PyPI - 'twine>=1.10.0', + 'twine>=1.10.0,<4', 'wheel>=0.30.0', # Advanced testing - 'coverage>=4.5.1', - 'tox>=2.9.1', + 'coverage>=4.5.1,<6', + 'tox>=2.9.1,<4', ] setup( - author="MIT Data To AI Lab", + author='MIT Data To AI Lab', author_email='dailabmit@gmail.com', classifiers=[ 'Development Status :: 2 - Pre-Alpha', @@ -67,7 +71,7 @@ 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', ], - description="Reversible Data Transformsi", + description='Reversible Data Transformsi', extras_require={ 'test': tests_require, 'dev': development_requires + tests_require, @@ -75,16 +79,16 @@ include_package_data=True, install_requires=install_requires, keywords='rdt', - license="MIT license", + license='MIT license', long_description=readme + '\n\n' + history, long_description_content_type='text/markdown', name='rdt', packages=find_packages(include=['rdt', 'rdt.*']), - python_requires='>=3.5', + python_requires='>=3.5,<3.8', setup_requires=setup_requires, test_suite='tests', tests_require=tests_require, url='https://github.com/sdv-dev/RDT', - version='0.2.1', + version='0.2.2.dev1', zip_safe=False, ) diff --git a/tests/test_hyper_transformer.py b/tests/test_hyper_transformer.py index fe1f455ee..6e8e834fd 100644 --- a/tests/test_hyper_transformer.py +++ b/tests/test_hyper_transformer.py @@ -236,3 +236,18 @@ def test__get_columns_error(self): with pytest.raises(ValueError): HyperTransformer._get_columns(data, 'a') + + def test__get_columns_regex(self): + data = pd.DataFrame({ + 'a(b)': [4, 5, 6], + 'a(b)#1': [7, 8, 9], + }) + + returned = HyperTransformer._get_columns(data, 'a(b)') + + expected = np.array([ + [4, 7], + [5, 8], + [6, 9] + ]) + np.testing.assert_equal(returned, expected) diff --git a/tox.ini b/tox.ini index c2de38eaa..31724c5dd 100644 --- a/tox.ini +++ b/tox.ini @@ -1,32 +1,29 @@ [tox] -envlist = py35, py36, py37, lint, docs - +envlist = py{35,36,37}, test-devel [travis] python = - 3.7: py37 - 3.6: py36, lint, docs + 3.7: py37, test-devel + 3.6: py36 3.5: py35 +[gh-actions] +python = + 3.7: py37, test-devel + 3.6: py36 + 3.5: py35 [testenv] passenv = CI TRAVIS TRAVIS_* -setenv = - PYTHONPATH = {toxinidir} -extras = test -commands = - /usr/bin/env make test - - -[testenv:lint] skipsdist = true -extras = dev +skip_install = true +commands_pre = + /usr/bin/env pip install .[test] commands = - /usr/bin/env make lint - + /usr/bin/env make test -[testenv:docs] -skipsdist = true -extras = dev +[testenv:test-devel] +commands_pre = + /usr/bin/env pip install .[dev] commands = - /usr/bin/env make docs + /usr/bin/env make test-devel