diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 000000000..12d63d205 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,28 @@ +name: Generate Docs + +on: + push: + branches: [ master ] + +jobs: + + docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Python + uses: actions/setup-python@v1 + with: + python-version: '3.7' + + - name: Build + run: | + python -m pip install --upgrade pip + pip install -e .[dev] + make docs + - name: Deploy + uses: peaceiris/actions-gh-pages@v3 + with: + github_token: ${{secrets.GITHUB_TOKEN}} + publish_dir: docs/_build/html diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 000000000..d90ef4ef8 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,30 @@ +name: Run Tests + +on: + push: + branches: [ '*' ] + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [3.5, 3.6, 3.7] + os: [ubuntu-latest, macos-latest] + + steps: + - uses: actions/checkout@v1 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox tox-gh-actions + + - name: Test with tox + run: tox diff --git a/.travis.yml b/.travis.yml index ef8e31a6f..643f44d52 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,16 +1,11 @@ # Config file for automatic testing at travis-ci.org -dist: trusty +dist: bionic language: python python: + - 3.7 - 3.6 - 3.5 -matrix: - include: - - python: 3.7 - dist: xenial - sudo: required - # Command to install dependencies install: pip install -U tox-travis codecov @@ -18,15 +13,3 @@ after_success: codecov # Command to run tests script: tox - -deploy: - - - provider: pages - skip-cleanup: true - github-token: "$GITHUB_TOKEN" - keep-history: true - local-dir: docs/_build/html - target-branch: gh-pages - on: - branch: master - python: 3.6 diff --git a/HISTORY.md b/HISTORY.md index 70bea0e99..fa41f42cc 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,11 @@ # History +## 0.2.2 - 2020-06-26 + +### Bugs Fixed + +* Escape `column_name` in hypertransformer - Issue [#110](https://github.com/sdv-dev/RDT/issues/110) by @csala + ## 0.2.1 - 2020-01-17 ### Bugs Fixed diff --git a/Makefile b/Makefile index 9a3acc8db..1b084afc0 100644 --- a/Makefile +++ b/Makefile @@ -91,23 +91,31 @@ lint: ## check style with flake8 and isort .PHONY: fix-lint fix-lint: ## fix lint issues using autoflake, autopep8, and isort - find rdt -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables - autopep8 --in-place --recursive --aggressive rdt - isort --apply --atomic --recursive rdt - - find tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables - autopep8 --in-place --recursive --aggressive tests - isort --apply --atomic --recursive tests + find rdt tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables + autopep8 --in-place --recursive --aggressive rdt tests + isort --apply --atomic --recursive rdt tests # TEST TARGETS +.PHONY: test-unit +test-unit: ## run tests quickly with the default Python + python -m pytest --cov=rdt + +.PHONY: test-readme +test-readme: ## run the readme snippets + rm -rf tests/readme_test && mkdir tests/readme_test + cd tests/readme_test && rundoc run --single-session python3 -t python3 ../../README.md + rm -rf tests/readme_test + .PHONY: test -test: ## run tests quickly with the default Python - python -m pytest --basetemp=${ENVTMPDIR} --cov=rdt +test: test-unit test-readme ## test everything that needs test dependencies + +.PHONY: test-devel +test-devel: lint docs ## test everything that needs development dependencies .PHONY: test-all -test-all: ## run tests on every Python version with tox +test-all: ## test using tox tox -r .PHONY: coverage @@ -142,21 +150,35 @@ dist: clean ## builds source and wheel package python setup.py bdist_wheel ls -l dist -.PHONY: test-publish -test-publish: dist ## package and upload a release on TestPyPI +.PHONY: publish-confirm +publish-confirm: + @echo "WARNING: This will irreversibly upload a new version to PyPI!" + @echo -n "Please type 'confirm' to proceed: " \ + && read answer \ + && [ "$${answer}" = "confirm" ] + +.PHONY: publish-test +publish-test: dist publish-confirm ## package and upload a release on TestPyPI twine upload --repository-url https://test.pypi.org/legacy/ dist/* .PHONY: publish -publish: dist ## package and upload a release +publish: dist publish-confirm ## package and upload a release twine upload dist/* .PHONY: bumpversion-release bumpversion-release: ## Merge master to stable and bumpversion release - git checkout stable || (git checkout -b stable && git push --set-upstream origin stable) + git checkout stable || git checkout -b stable git merge --no-ff master -m"make release-tag: Merge branch 'master' into stable" bumpversion release git push --tags origin stable +.PHONY: bumpversion-release-test +bumpversion-release-test: ## Merge master to stable and bumpversion release + git checkout stable || git checkout -b stable + git merge --no-ff master -m"make release-tag: Merge branch 'master' into stable" + bumpversion release --no-tag + @echo git push --tags origin stable + .PHONY: bumpversion-patch bumpversion-patch: ## Merge stable to master and bumpversion patch git checkout master @@ -164,6 +186,10 @@ bumpversion-patch: ## Merge stable to master and bumpversion patch bumpversion --no-tag patch git push +.PHONY: bumpversion-candidate +bumpversion-candidate: ## Bump the version to the next candidate + bumpversion candidate --no-tag + .PHONY: bumpversion-minor bumpversion-minor: ## Bump the version the next minor skipping the release bumpversion --no-tag minor @@ -172,13 +198,21 @@ bumpversion-minor: ## Bump the version the next minor skipping the release bumpversion-major: ## Bump the version the next major skipping the release bumpversion --no-tag major -.PHONY: bumpversion-candidate -bumpversion-candidate: ## Bump the version to the next candidate - bumpversion candidate --no-tag +.PHONY: bumpversion-revert +bumpversion-revert: ## Undo a previous bumpversion-release + git checkout master + git branch -D stable +CLEAN_DIR := $(shell git status --short | grep -v ??) CURRENT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null) CHANGELOG_LINES := $(shell git diff HEAD..origin/stable HISTORY.md 2>&1 | wc -l) +.PHONY: check-clean +check-clean: ## Check if the directory has uncommitted changes +ifneq ($(CLEAN_DIR),) + $(error There are uncommitted changes) +endif + .PHONY: check-master check-master: ## Check if we are in master branch ifneq ($(CURRENT_BRANCH),master) @@ -192,14 +226,21 @@ ifeq ($(CHANGELOG_LINES),0) endif .PHONY: check-release -check-release: check-master check-history ## Check if the release can be made +check-release: check-clean check-master check-history ## Check if the release can be made + @echo "A new release can be made" .PHONY: release release: check-release bumpversion-release publish bumpversion-patch +.PHONY: release-test +release-test: check-release bumpversion-release-test publish-test bumpversion-revert + .PHONY: release-candidate release-candidate: check-master publish bumpversion-candidate +.PHONY: release-candidate-test +release-candidate-test: check-clean check-master publish-test + .PHONY: release-minor release-minor: check-release bumpversion-minor release diff --git a/README.md b/README.md index 7283e106e..baee72d72 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,7 @@ An open source project from Data to AI Lab at MIT.
+[![Development Status](https://img.shields.io/badge/Development%20Status-2%20--%20Pre--Alpha-yellow)](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha) [![PyPi Shield](https://img.shields.io/pypi/v/RDT.svg)](https://pypi.python.org/pypi/RDT) [![Travis CI Shield](https://travis-ci.org/sdv-dev/RDT.svg?branch=master)](https://travis-ci.org/sdv-dev/RDT) [![Coverage Status](https://codecov.io/gh/sdv-dev/RDT/branch/master/graph/badge.svg)](https://codecov.io/gh/sdv-dev/RDT) @@ -11,6 +12,7 @@ # RDT: Reversible Data Transforms * License: [MIT](https://github.com/sdv-dev/RDT/blob/master/LICENSE) +* Development Status: [Pre-Alpha](https://pypi.org/search/?c=Development+Status+%3A%3A+2+-+Pre-Alpha) * Documentation: https://sdv-dev.github.io/RDT * Homepage: https://github.com/sdv-dev/RDT @@ -23,31 +25,16 @@ the transformations in order to revert them as needed. ## Requirements -**RDT** has been developed and tested on [Python 3.5, 3.6 and 3.7](https://www.python.org/downloads) +**RDT** has been developed and tested on [Python 3.5 and 3.6, 3.7](https://www.python.org/downloads/) -Also, although it is not strictly required, the usage of a -[virtualenv](https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid +Also, although it is not strictly required, the usage of a [virtualenv]( +https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering with other software installed in the system where **RDT** is run. -These are the minimum commands needed to create a virtualenv using python3.6 for **RDT**: - -```bash -pip install virtualenv -virtualenv -p $(which python3.6) rdt-venv -``` - -Afterwards, you have to execute this command to have the virtualenv activated: - -```bash -source rdt-venv/bin/activate -``` - -Remember about executing it every time you start a new console to work on **RDT**! - ## Install with pip -After creating the virtualenv and activating it, we recommend using -[pip](https://pip.pypa.io/en/stable/) in order to install **RDT**: +The easiest and recommended way to install **RDT** is using [pip]( +https://pip.pypa.io/en/stable/): ```bash pip install rdt @@ -55,25 +42,9 @@ pip install rdt This will pull and install the latest stable release from [PyPi](https://pypi.org/). -## Install from source +If you want to install from source or contribute to the project please read the +[Contributing Guide](https://sdv-dev.github.io/RDT/contributing.html#get-started). -With your virtualenv activated, you can clone the repository and install it from -source by running `make install` on the `stable` branch: - -```bash -git clone git@github.com:sdv-dev/RDT.git -cd RDT -git checkout stable -make install -``` - -## Install for Development - -If you want to contribute to the project, a few more steps are required to make the project ready -for development. - -Please head to the [Contributing Guide](https://sdv-dev.github.io/RDT/contributing.html#get-started) -for more details about this process. # Quickstart @@ -90,7 +61,7 @@ a single column loaded as a `pandas.DataFrame` object. You can load some demo data using the `rdt.get_demo` function, which will return some random data for you to play with. -```python +```python3 from rdt import get_demo data = get_demo() @@ -119,8 +90,9 @@ RDT introduced some null values randomly. In this example we will use the datetime column, so let's load a `DatetimeTransformer`. -```python +```python3 from rdt.transformers import DatetimeTransformer + transformer = DatetimeTransformer() ``` @@ -130,7 +102,7 @@ Before being able to transform the data, we need the transformer to learn from i We will do this by calling its `fit` method passing the column that we want to transform. -```python +```python3 transformer.fit(data['3_datetime']) ``` @@ -139,7 +111,7 @@ transformer.fit(data['3_datetime']) Once the transformer is fitted, we can pass the data again to its `transform` method in order to get the transformed version of the data. -```python +```python3 transformed = transformer.transform(data['3_datetime']) ``` @@ -165,7 +137,7 @@ array([[1.61299380e+18, 0.00000000e+00], In order to revert the previous transformation, the transformed data can be passed to the `reverse_transform` method of the transformer: -```python +```python3 reversed_data = transformer.reverse_transform(transformed) ``` @@ -195,8 +167,9 @@ a table with multiple columns. In order to manuipulate a complete table we will need to load a `rdt.HyperTransformer`. -```python +```python3 from rdt import HyperTransformer + ht = HyperTransformer() ``` @@ -207,7 +180,7 @@ data. This is done by calling its `fit` method passing the `data` DataFrame. -```python +```python3 ht.fit(data) ``` @@ -216,7 +189,7 @@ ht.fit(data) Once the HyperTransformer is fitted, we can pass the data again to its `transform` method in order to get the transformed version of the data. -```python +```python3 transformed = ht.transform(data) ``` @@ -243,7 +216,7 @@ In order to revert the transformation and recover the original data from the tra we need to call `reverse_transform` method of the `HyperTransformer` instance passing it the transformed data. -```python +```python3 reversed_data = ht.reverse_transform(transformed) ``` diff --git a/rdt/__init__.py b/rdt/__init__.py index f0eba93a2..f7121cea1 100644 --- a/rdt/__init__.py +++ b/rdt/__init__.py @@ -5,7 +5,7 @@ __author__ = """MIT Data To AI Lab""" __email__ = 'dailabmit@gmail.com' -__version__ = '0.2.1' +__version__ = '0.2.2.dev1' import numpy as np import pandas as pd diff --git a/rdt/hyper_transformer.py b/rdt/hyper_transformer.py index 3b0ffeb09..9572c9396 100644 --- a/rdt/hyper_transformer.py +++ b/rdt/hyper_transformer.py @@ -1,3 +1,5 @@ +import re + import numpy as np from rdt.transformers import ( @@ -191,7 +193,7 @@ def _get_columns(data, column_name): ValueError: if no columns match. """ - regex = r'{}(#[0-9]+)?$'.format(column_name) + regex = r'{}(#[0-9]+)?$'.format(re.escape(column_name)) columns = data.columns[data.columns.str.match(regex)] if columns.empty: raise ValueError('No columns match_ {}'.format(column_name)) diff --git a/setup.cfg b/setup.cfg index 62aaaf23b..5a940a9fd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.2.1 +current_version = 0.2.2.dev1 commit = True tag = True parse = (?P