diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..12301490 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 57671348..2ee35ea3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -7,12 +7,7 @@ jobs: strategy: matrix: os: [macos-latest, windows-latest, ubuntu-latest] - python-version: [3.7, 3.8, 3.9, '3.10', '3.11', pypy-3.7] - exclude: - - os: windows-latest - python-version: 3.7 - - os: windows-latest - python-version: pypy-3.7 + python-version: [3.8, 3.9, '3.10', '3.11', '3.12', pypy-3.9] steps: - if: matrix.os == 'ubuntu-latest' name: Install UTF-8 locales and lxml requirements @@ -22,8 +17,8 @@ jobs: sudo locale-gen en_US.UTF-8 sudo locale-gen ko_KR.UTF-8 sudo update-locale - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} cache: pip diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 28c49bb4..e4a09a27 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,14 +1,12 @@ name: Lint on: [push, pull_request] -env: - BASEDIR: https://raw.githubusercontent.com/open-contracting/standard-maintenance-scripts/main jobs: build: if: github.event_name == 'push' || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: '3.10' cache: pip diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 2654ebbc..be8e5468 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -3,9 +3,11 @@ on: push jobs: build: runs-on: ubuntu-latest + permissions: + id-token: write steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: '3.10' - run: pip install --upgrade build @@ -13,11 +15,8 @@ jobs: - name: Publish to TestPyPI uses: pypa/gh-action-pypi-publish@release/v1 with: - password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository-url: https://test.pypi.org/legacy/ skip-existing: true - name: Publish to PyPI if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/AUTHORS.rst b/AUTHORS.rst index a6f367e4..ca5e1b32 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -46,3 +46,5 @@ agate is made by a community. The following individuals have contributed code, d * `mathdesc `_ * `Tim Gates `_ * `castorf `_ +* `Julien Enselme `__ + diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1afef6ad..ea4ecb3b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,22 +1,38 @@ -Unreleased ----------- +1.11.0 - April 27. 2024 +----------------------- + +* fix: Fix :meth:`.TableSet.print_structure` for nested tablesets. (#765) + +1.9.1 - December 21, 2023 +------------------------- + +* Add Babel 2.14 support. + +1.9.0 - October 17, 2023 +------------------------ + +* feat: Add a ``text_truncation_chars`` configuration for values that exceed ``max_column_width`` in :meth:`.Table.print_table` and :meth:`.Table.print_html`. +* feat: Add a ``number_truncation_chars`` configuration for values that exceed ``max_precision`` in :meth:`.Table.print_table` and :meth:`.Table.print_html`. + +1.8.0 - October 10, 2023 +------------------------ * feat: Lowercase the ``null_values`` provided to individual data types, since all comparisons to ``null_values`` are case-insensitive. (#770) * feat: :class:`.Mean` works with :class:`.TimeDelta`. (#761) -* fix: Fix :meth:`.TableSet.print_structure` for nested tablesets. (#765) +* Switch from ``pytz`` to ``ZoneInfo``. +* Add Python 3.12 support. +* Drop Python 3.7 support (end-of-life was June 27, 2023). -1.7.1 - Jan 4, 2023 -------------------- +1.7.1 - January 4, 2023 +----------------------- * Allow parsedatetime 2.6. -1.7.0 - Jan 3, 2023 -------------------- +1.7.0 - January 3, 2023 +----------------------- -* Add Python 3.11 support. -* Add Python 3.10 support. -* Drop Python 3.6 support (end-of-life was December 23, 2021). -* Drop Python 2.7 support (end-of-life was January 1, 2020). +* Add Python 3.10 and 3.11 support. +* Drop support for Python 2.7 (EOL 2020-01-01), 3.6 (2021-12-23). 1.6.3 - July 15, 2021 --------------------- @@ -37,8 +53,7 @@ Unreleased * fix: Aggregations return ``None`` if all values are ``None``, instead of raising an error. Note that ``Sum``, ``MaxLength`` and ``MaxPrecision`` continue to return ``0`` if all values are ``None``. (#706) * fix: Ensure files are closed when errors occur. (#734) * build: Make PyICU an optional dependency. -* Drop Python 3.5 support (end-of-life was September 13, 2020). -* Drop Python 3.4 support (end-of-life was March 18, 2019). +* Drop support for Python 3.4 (2019-03-18), 3.5 (2020-09-13). 1.6.2 - March 10, 2021 ---------------------- diff --git a/agate/__init__.py b/agate/__init__.py index 339fe694..77a01443 100644 --- a/agate/__init__.py +++ b/agate/__init__.py @@ -1,4 +1,3 @@ - import agate.csv_py3 as csv from agate.aggregations import * from agate.columns import Column diff --git a/agate/aggregations/base.py b/agate/aggregations/base.py index aa7b48d8..307920f3 100644 --- a/agate/aggregations/base.py +++ b/agate/aggregations/base.py @@ -1,4 +1,3 @@ - from agate.exceptions import UnsupportedAggregationError diff --git a/agate/computations/base.py b/agate/computations/base.py index 69577aed..59b27dca 100644 --- a/agate/computations/base.py +++ b/agate/computations/base.py @@ -1,4 +1,3 @@ - class Computation: # pragma: no cover """ Computations produce a new column by performing a calculation on each row. diff --git a/agate/computations/percent.py b/agate/computations/percent.py index da3a20ff..3fb440e0 100644 --- a/agate/computations/percent.py +++ b/agate/computations/percent.py @@ -1,4 +1,3 @@ - from agate.aggregations.has_nulls import HasNulls from agate.aggregations.sum import Sum from agate.computations.base import Computation diff --git a/agate/config.py b/agate/config.py index b20b275e..f79ee875 100644 --- a/agate/config.py +++ b/agate/config.py @@ -26,6 +26,10 @@ +-------------------------+------------------------------------------+-----------------------------------------+ | ellipsis_chars | Characters to render for ellipsis | '...' | +-------------------------+------------------------------------------+-----------------------------------------+ +| text_truncation_chars | Characters for truncated text values | '...' | ++-------------------------+------------------------------------------+-----------------------------------------+ +| number_truncation_chars | Characters for truncated number values | '…' | ++-------------------------+------------------------------------------+-----------------------------------------+ """ @@ -50,6 +54,10 @@ 'tick_char': '+', #: Characters to render for ellipsis 'ellipsis_chars': '...', + #: Characters for truncated text values + 'text_truncation_chars': '...', + #: Characters for truncated number values + 'number_truncation_chars': '…', } diff --git a/agate/data_types/base.py b/agate/data_types/base.py index aaa4b5e2..951ab072 100644 --- a/agate/data_types/base.py +++ b/agate/data_types/base.py @@ -1,4 +1,3 @@ - from agate.exceptions import CastError #: Default values which will be automatically cast to :code:`None` diff --git a/agate/data_types/date_time.py b/agate/data_types/date_time.py index 21fb21eb..71fad4fd 100644 --- a/agate/data_types/date_time.py +++ b/agate/data_types/date_time.py @@ -16,8 +16,7 @@ class DateTime(DataType): A formatting string for :meth:`datetime.datetime.strptime` to use instead of using regex-based parsing. :param timezone: - A `pytz `_ timezone to apply to each - parsed date. + A ``ZoneInfo`` timezone to apply to each parsed date. :param locale: A locale specification such as :code:`en_US` or :code:`de_DE` to use for parsing formatted datetimes. diff --git a/agate/data_types/number.py b/agate/data_types/number.py index bc29fda6..b6dbee99 100644 --- a/agate/data_types/number.py +++ b/agate/data_types/number.py @@ -43,8 +43,11 @@ def __init__(self, locale='en_US', group_symbol=None, decimal_symbol=None, with warnings.catch_warnings(): warnings.simplefilter("ignore") - self.group_symbol = group_symbol or self.locale.number_symbols.get('group', ',') - self.decimal_symbol = decimal_symbol or self.locale.number_symbols.get('decimal', '.') + # Babel 2.14 support. + # https://babel.pocoo.org/en/latest/changelog.html#possibly-backwards-incompatible-changes + number_symbols = self.locale.number_symbols.get('latn', self.locale.number_symbols) + self.group_symbol = group_symbol or number_symbols.get('group', ',') + self.decimal_symbol = decimal_symbol or number_symbols.get('decimal', '.') def cast(self, d): """ diff --git a/agate/data_types/text.py b/agate/data_types/text.py index 178e43cc..263d2157 100644 --- a/agate/data_types/text.py +++ b/agate/data_types/text.py @@ -1,4 +1,3 @@ - from agate.data_types.base import DataType diff --git a/agate/table/find.py b/agate/table/find.py index 99dd373a..d13d7cdd 100644 --- a/agate/table/find.py +++ b/agate/table/find.py @@ -1,4 +1,3 @@ - def find(self, test): """ Find the first row that passes a test. diff --git a/agate/table/from_object.py b/agate/table/from_object.py index f5357f05..f114e918 100644 --- a/agate/table/from_object.py +++ b/agate/table/from_object.py @@ -40,6 +40,25 @@ def from_object(cls, obj, row_names=None, column_types=None): Not all rows are required to have the same keys. Missing elements will be filled in with null values. + Keys containing a slash (``/``) can collide with other keys. For example: + + .. code-block:: python + + { + 'a/b': 2, + 'a': { + 'b': False + } + } + + Would generate: + + .. code-block:: python + + { + 'a/b': false + } + :param obj: Filepath or file-like object from which to read JSON data. :param row_names: diff --git a/agate/table/limit.py b/agate/table/limit.py index d164c183..701e6693 100644 --- a/agate/table/limit.py +++ b/agate/table/limit.py @@ -1,4 +1,3 @@ - def limit(self, start_or_stop=None, stop=None, step=None): """ Create a new table with fewer rows. diff --git a/agate/table/pivot.py b/agate/table/pivot.py index 126805da..f74d848c 100644 --- a/agate/table/pivot.py +++ b/agate/table/pivot.py @@ -1,4 +1,3 @@ - from agate import utils from agate.aggregations import Count diff --git a/agate/table/print_html.py b/agate/table/print_html.py index 610516a2..41c0837b 100644 --- a/agate/table/print_html.py +++ b/agate/table/print_html.py @@ -43,6 +43,8 @@ def print_html(self, max_rows=20, max_columns=6, output=sys.stdout, max_column_w max_precision = float('inf') ellipsis = config.get_option('ellipsis_chars') + truncation = config.get_option('text_truncation_chars') + len_truncation = len(truncation) locale = locale or config.get_option('default_locale') rows_truncated = max_rows < len(self._rows) @@ -93,7 +95,7 @@ def print_html(self, max_rows=20, max_columns=6, output=sys.stdout, max_column_w v = str(v) if max_column_width is not None and len(v) > max_column_width: - v = '%s...' % v[:max_column_width - 3] + v = '%s%s' % (v[:max_column_width - len_truncation], truncation) formatted_row.append(v) diff --git a/agate/table/print_table.py b/agate/table/print_table.py index 3490b1e0..d066488a 100644 --- a/agate/table/print_table.py +++ b/agate/table/print_table.py @@ -45,6 +45,8 @@ def print_table(self, max_rows=20, max_columns=6, output=sys.stdout, max_column_ max_precision = float('inf') ellipsis = config.get_option('ellipsis_chars') + truncation = config.get_option('text_truncation_chars') + len_truncation = len(truncation) h_line = config.get_option('horizontal_line_char') v_line = config.get_option('vertical_line_char') locale = locale or config.get_option('default_locale') @@ -54,7 +56,7 @@ def print_table(self, max_rows=20, max_columns=6, output=sys.stdout, max_column_ column_names = [] for column_name in self.column_names[:max_columns]: if max_column_width is not None and len(column_name) > max_column_width: - column_names.append('%s...' % column_name[:max_column_width - 3]) + column_names.append('%s%s' % (column_name[:max_column_width - len_truncation], truncation)) else: column_names.append(column_name) @@ -102,7 +104,7 @@ def print_table(self, max_rows=20, max_columns=6, output=sys.stdout, max_column_ v = str(v) if max_column_width is not None and len(v) > max_column_width: - v = '%s...' % v[:max_column_width - 3] + v = '%s%s' % (v[:max_column_width - len_truncation], truncation) if len(v) > widths[j]: widths[j] = len(v) diff --git a/agate/table/to_csv.py b/agate/table/to_csv.py index e667ec48..4be7a96c 100644 --- a/agate/table/to_csv.py +++ b/agate/table/to_csv.py @@ -6,7 +6,9 @@ def to_csv(self, path, **kwargs): Write this table to a CSV. This method uses agate's builtin CSV writer, which supports unicode on both Python 2 and Python 3. - `kwargs` will be passed through to the CSV writer. + ``kwargs`` will be passed through to the CSV writer. + + The ``lineterminator`` defaults to the newline character (LF, ``\\n``). :param path: Filepath or file-like object to write to. diff --git a/agate/table/where.py b/agate/table/where.py index 7127cb97..90259771 100644 --- a/agate/table/where.py +++ b/agate/table/where.py @@ -1,4 +1,3 @@ - def where(self, test): """ Create a new :class:`.Table` with only those rows that pass a test. diff --git a/agate/tableset/having.py b/agate/tableset/having.py index 6f6b466c..bb0f46ac 100644 --- a/agate/tableset/having.py +++ b/agate/tableset/having.py @@ -1,4 +1,3 @@ - def having(self, aggregations, test): """ Create a new :class:`.TableSet` with only those tables that pass a test. diff --git a/agate/tableset/proxy_methods.py b/agate/tableset/proxy_methods.py index 4b613c50..e7ac2b75 100644 --- a/agate/tableset/proxy_methods.py +++ b/agate/tableset/proxy_methods.py @@ -1,4 +1,3 @@ - def bins(self, *args, **kwargs): """ Calls :meth:`.Table.bins` on each table in the TableSet. diff --git a/agate/utils.py b/agate/utils.py index fc290c18..69bb01eb 100644 --- a/agate/utils.py +++ b/agate/utils.py @@ -12,6 +12,7 @@ from slugify import slugify as pslugify +from agate import config from agate.warns import warn_duplicate_column, warn_unnamed_column #: Sentinal for use when `None` is an valid argument value @@ -161,7 +162,7 @@ def make_number_formatter(decimal_places, add_ellipsis=False): Optionally add an ellipsis symbol at the end of a number """ fraction = '0' * decimal_places - ellipsis = '…' if add_ellipsis else '' + ellipsis = config.get_option('number_truncation_chars') if add_ellipsis else '' return ''.join(['#,##0.', fraction, ellipsis, ';-#,##0.', fraction, ellipsis]) diff --git a/docs/conf.py b/docs/conf.py index 4c341464..4b9a07bc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -12,7 +12,7 @@ project = 'agate' copyright = '2017, Christopher Groskopf' -version = '1.7.1' +version = '1.9.1' release = version # -- General configuration --------------------------------------------------- diff --git a/docs/cookbook/datetime.rst b/docs/cookbook/datetime.rst index 78ffcf0c..5f9f781a 100644 --- a/docs/cookbook/datetime.rst +++ b/docs/cookbook/datetime.rst @@ -34,9 +34,13 @@ The second way is to specify a timezone as an argument to the type constructor: .. code-block:: python - import pytz + try: + from zoneinfo import ZoneInfo + except ImportError: + # Fallback for Python < 3.9 + from backports.zoneinfo import ZoneInfo - eastern = pytz.timezone('US/Eastern') + eastern = ZoneInfo('US/Eastern') datetime_type = agate.DateTime(timezone=eastern) In this case all timezones that are processed will be set to have the Eastern timezone. Note, the timezone will be **set**, not converted. You cannot use this method to convert your timezones from UTC to another timezone. To do that see :ref:`convert_timezones`. @@ -60,9 +64,13 @@ If you load data from a spreadsheet in one timezone and you need to convert it t .. code-block:: python - import pytz + try: + from zoneinfo import ZoneInfo + except ImportError: + # Fallback for Python < 3.9 + from backports.zoneinfo import ZoneInfo - us_eastern = pytz.timezone('US/Eastern') + us_eastern = ZoneInfo('US/Eastern') datetime_type = agate.DateTime(timezone=us_eastern) column_names = ['what', 'when'] @@ -70,7 +78,7 @@ If you load data from a spreadsheet in one timezone and you need to convert it t table = agate.Table.from_csv('events.csv', columns) - rome = timezone('Europe/Rome') + rome = ZoneInfo('Europe/Rome') timezone_shifter = agate.Formula(lambda r: r['when'].astimezone(rome)) table = agate.Table.compute([ diff --git a/examples/test_from_json_ambiguous.json b/examples/test_from_json_ambiguous.json new file mode 100644 index 00000000..5435946e --- /dev/null +++ b/examples/test_from_json_ambiguous.json @@ -0,0 +1,8 @@ +[ + { + "a/b": 2, + "a": { + "b": false + } + } +] diff --git a/setup.py b/setup.py index 3aa57f42..49851d19 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name='agate', - version='1.7.1', + version='1.9.1', description='A data analysis library that is optimized for humans instead of machines.', long_description=long_description, long_description_content_type='text/x-rst', @@ -25,11 +25,11 @@ 'Natural Language :: English', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Scientific/Engineering :: Information Analysis', @@ -44,6 +44,7 @@ 'parsedatetime>=2.1,!=2.5', 'python-slugify>=1.2.1', 'pytimeparse>=1.1.5', + 'tzdata>=2023.3;platform_system=="Windows"', ], extras_require={ 'test': [ @@ -54,7 +55,7 @@ 'PyICU>=2.4.2;sys_platform=="linux"', 'pytest', 'pytest-cov', - 'pytz>=2015.4', + 'backports.zoneinfo;python_version<"3.9"', ], } ) diff --git a/tests/test_data_types.py b/tests/test_data_types.py index 842b9188..4f2bac53 100644 --- a/tests/test_data_types.py +++ b/tests/test_data_types.py @@ -4,7 +4,12 @@ from decimal import Decimal import parsedatetime -import pytz + +try: + from zoneinfo import ZoneInfo +except ImportError: + # Fallback for Python < 3.9 + from backports.zoneinfo import ZoneInfo from agate.data_types import Boolean, Date, DateTime, Number, Text, TimeDelta from agate.exceptions import CastError @@ -352,16 +357,16 @@ def test_cast_parser(self): )) def test_cast_parser_timezone(self): - tzinfo = pytz.timezone('US/Pacific') + tzinfo = ZoneInfo('US/Pacific') datetime_type = DateTime(timezone=tzinfo) values = ('3/1/1994 12:30 PM', '2/17/2011 06:30', None, 'January 5th, 1984 22:37', 'n/a') casted = tuple(datetime_type.cast(v) for v in values) self.assertSequenceEqual(casted, ( - tzinfo.localize(datetime.datetime(1994, 3, 1, 12, 30, 0, 0)), - tzinfo.localize(datetime.datetime(2011, 2, 17, 6, 30, 0, 0)), + datetime.datetime(1994, 3, 1, 12, 30, 0, 0, tzinfo=tzinfo), + datetime.datetime(2011, 2, 17, 6, 30, 0, 0, tzinfo=tzinfo), None, - tzinfo.localize(datetime.datetime(1984, 1, 5, 22, 37, 0, 0)), + datetime.datetime(1984, 1, 5, 22, 37, 0, 0, tzinfo=tzinfo), None )) diff --git a/tests/test_from_json.py b/tests/test_from_json.py index 2dcec013..63ba0602 100644 --- a/tests/test_from_json.py +++ b/tests/test_from_json.py @@ -1,6 +1,6 @@ - from agate import Table from agate.data_types import Boolean, Date, DateTime, Number, Text, TimeDelta +from agate.rows import Row from agate.testcase import AgateTestCase from agate.type_tester import TypeTester @@ -86,3 +86,10 @@ def test_from_json_no_type_tester(self): def test_from_json_error_newline_key(self): with self.assertRaises(ValueError): Table.from_json('examples/test.json', newline=True, key='test') + + def test_from_json_ambiguous(self): + table = Table.from_json('examples/test_from_json_ambiguous.json') + + self.assertColumnNames(table, ('a/b',)) + self.assertColumnTypes(table, [Boolean]) + self.assertRows(table, [Row([False])]) diff --git a/tests/test_py3.py b/tests/test_py3.py index de24373c..e087cbc8 100644 --- a/tests/test_py3.py +++ b/tests/test_py3.py @@ -1,5 +1,7 @@ import csv import os +import platform +import sys import unittest from io import StringIO @@ -231,6 +233,10 @@ def test_writerows(self): class TestSniffer(unittest.TestCase): + @unittest.skipIf( + platform.system() == 'Darwin' and sys.version_info[:2] == (3, 10), + reason='The (macos-latest, 3.10) job fails on GitHub Actions' + ) def test_sniffer(self): with open('examples/test.csv', encoding='utf-8') as f: contents = f.read() diff --git a/tests/test_table/test_compute.py b/tests/test_table/test_compute.py index 530dc12b..976ee2dd 100644 --- a/tests/test_table/test_compute.py +++ b/tests/test_table/test_compute.py @@ -1,4 +1,3 @@ - from agate import Table from agate.computations import Formula from agate.data_types import Number, Text