Skip to content

Commit

Permalink
Increase timeouts to up to 30 sec; drop Py 3.6 (#43)
Browse files Browse the repository at this point in the history
* Simplify event loop handling, bump Python requirement to 3.7

* Increase timeouts to up to 30 sec; drop Py 3.6

* Handle gateway timeout when fetching metadata

* Ramp up timeout for GraphQL endpoint

* Fix test

* Bump timeout in another place too

* Bump timeout to 60 sec

* Retry on RemoteProtocolError

* Make use of GH concorrency limit, remove push trigger

* Remove push trigger for linter

* Cancel in-progress linting

* concurrency logic

* Do not cancel

* Better logging

* Limit tests to Py 3.7 for now to avoid hitting OpenNeuro rate limit
  • Loading branch information
hoechenberger authored May 18, 2021
1 parent c6a6701 commit 805f79c
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 49 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/run-linter.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
name: Linter

on:
push:
branches: ['**']
# push:
# branches: ['**']
pull_request:
branches: ['**']
create:
Expand All @@ -15,7 +15,10 @@ on:
- cron: "0 4 * * *"

jobs:
build:
lint:

concurrency: lint_job
# cancel-in-progress: true

runs-on: ubuntu-latest
strategy:
Expand Down
12 changes: 8 additions & 4 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
name: Unit tests

on:
push:
branches: ['**']
# push:
# branches: ['**']
pull_request:
branches: ['**']
create:
Expand All @@ -15,13 +15,17 @@ on:
- cron: "0 4 * * *"

jobs:
build:
test:

concurrency: test_job
# cancel-in-progress: true

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: [3.6, 3.7, 3.8, 3.9]
# python-version: [3.7, 3.8, 3.9]
python-version: [3.7]

steps:
- uses: actions/checkout@v2
Expand Down
6 changes: 6 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## 2021.5

- Ramp up timeouts from 5 to 30 seconds for downloads from `openneuro.org`.
- Drop support for Python 3.6. `openneuro-py` now requires Python 3.7 or newer.
This change makes development easier.

## 2021.4

- Avoid timeouts that would occur in certain situations.
Expand Down
61 changes: 38 additions & 23 deletions openneuro/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,21 @@
sys.stdout.reconfigure(encoding='utf-8')
stdout_unicode = True
except AttributeError:
# Python 3.6
stdout_unicode = False


# HTTP server responses that indicate hopefully intermittent errors that
# warrant a retry.
allowed_retry_codes = (408, 500, 502, 503, 504, 522, 524)
allowed_retry_exceptions = (httpx.ConnectTimeout, httpx.ReadTimeout,
requests.exceptions.ConnectTimeout,
requests.exceptions.ReadTimeout)
allowed_retry_exceptions = (
httpx.ConnectTimeout, httpx.ReadTimeout,
requests.exceptions.ConnectTimeout,
requests.exceptions.ReadTimeout,

# "peer closed connection without sending complete message body
# (incomplete chunked read)"
httpx.RemoteProtocolError
)

# GraphQL endpoint and queries.

Expand Down Expand Up @@ -133,7 +138,8 @@ def _get_download_metadata(*,
tag=tag)

with requests.Session() as session:
gql_endpoint = RequestsEndpoint(url=gql_url, session=session)
gql_endpoint = RequestsEndpoint(url=gql_url, session=session,
timeout=60)

try:
response_json = gql_endpoint(query=query)
Expand All @@ -142,6 +148,12 @@ def _get_download_metadata(*,
response_json = None
request_timed_out = True

# Sometimes we do get a response, but it contains a gateway timeout error
# messsage (504 status code)
if (response_json is not None and 'errors' in response_json and
response_json['errors'][0]['message'].startswith('504')):
request_timed_out = True

if request_timed_out and max_retries > 0:
tqdm.write('Request timed out while fetching metadata, retrying …')
asyncio.sleep(retry_backoff)
Expand Down Expand Up @@ -181,11 +193,18 @@ async def _download_file(*,
else:
local_file_size = 0

# The OpenNeuro servers are sometimes very slow to respond, so use a
# gigantic timeout for those.
if url.startswith('https://openneuro.org/crn/'):
timeout = 60
else:
timeout = 5

# Check if we need to resume a download
# The file sizes provided via the API often do not match the sizes reported
# by the HTTP server. Rely on the sizes reported by the HTTP server.
async with semaphore:
async with httpx.AsyncClient() as client:
async with httpx.AsyncClient(timeout=timeout) as client:
try:
response = await client.head(url)
headers = response.headers
Expand Down Expand Up @@ -279,7 +298,7 @@ async def _download_file(*,
mode = 'wb'

async with semaphore:
async with httpx.AsyncClient() as client:
async with httpx.AsyncClient(timeout=timeout) as client:
try:
async with (
client.stream('GET', url=url, headers=headers)
Expand Down Expand Up @@ -378,8 +397,8 @@ async def _retrieve_and_write_to_disk(
total=remote_file_size, unit='B',
unit_scale=True, unit_divisor=1024,
leave=False) as progress:

num_bytes_downloaded = response.num_bytes_downloaded

# TODO Add timeout handling here, too.
async for chunk in response.aiter_bytes():
await f.write(chunk)
Expand All @@ -399,8 +418,9 @@ async def _retrieve_and_write_to_disk(
local_file_size = outfile.stat().st_size
if not local_file_size == remote_file_size:
raise RuntimeError(
f'Server claimed file size would be {remote_file_size} '
f'bytes, but downloaded {local_file_size} byes.')
f'Server claimed size of {outfile }would be '
f'{remote_file_size} bytes, but downloaded '
f'{local_file_size} byes.')


async def _download_files(*,
Expand Down Expand Up @@ -556,19 +576,14 @@ def download(*,
msg = f'👉 {msg}'
tqdm.write(msg)

# Pre-Python-3.7 compat. Once we drop support for Python 3.6, simply
# replace this with: asyncio.run(_download_files(...))
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(
[_download_files(
target_dir=target_dir,
files=files,
verify_hash=verify_hash,
verify_size=verify_size,
max_retries=max_retries,
retry_backoff=retry_backoff,
max_concurrent_downloads=max_concurrent_downloads)]
))
kwargs = dict(target_dir=target_dir,
files=files,
verify_hash=verify_hash,
verify_size=verify_size,
max_retries=max_retries,
retry_backoff=retry_backoff,
max_concurrent_downloads=max_concurrent_downloads)
asyncio.run(_download_files(**kwargs))

msg_finished = f'Finished downloading {dataset}.'
if stdout_unicode:
Expand Down
32 changes: 15 additions & 17 deletions openneuro/tests/test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,30 @@
from openneuro import download


@pytest.fixture
def dataset_id():
return 'ds000246'
dataset_id_aws = 'ds000246'
tag_aws = '1.0.0'
include_aws = 'sub-0001/anat'

dataset_id_on = 'ds000117'
include_on = 'sub-16/ses-meg'

@pytest.fixture
def tag():
return '1.0.0'


@pytest.fixture
def invalid_tag():
return 'abcdefg'


@pytest.fixture
def include():
return 'sub-0001/anat'
invalid_tag = 'abcdefg'


@pytest.mark.parametrize(
('dataset_id', 'tag', 'include'),
[
(dataset_id_aws, tag_aws, include_aws),
(dataset_id_on, None, include_on)
]
)
def test_download(tmp_path, dataset_id, tag, include):
"""Test downloading some files."""
download(dataset=dataset_id, tag=tag, target_dir=tmp_path, include=include)


def test_download_invalid_tag(tmp_path, dataset_id, invalid_tag):
def test_download_invalid_tag(tmp_path, dataset_id=dataset_id_aws,
invalid_tag=invalid_tag):
"""Test handling of a non-existent tag."""
with pytest.raises(RuntimeError, match='snapshot.*does not exist'):
download(dataset=dataset_id, tag=invalid_tag, target_dir=tmp_path)
10 changes: 8 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = openneuro-py
version = 2021.4
version = 2021.5
author = Richard Höchenberger <[email protected]>
author_email = [email protected]
url = https://github.com/hoechenberger/openneuro-py
Expand All @@ -16,9 +16,12 @@ classifiers =
Intended Audience :: Science/Research
Programming Language :: Python
Programming Language :: Python :: 3
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9

[options]
python_requires = >=3.6
python_requires = >=3.7
install_requires =
httpx >=0.15
requests
Expand All @@ -32,3 +35,6 @@ install_requires =
[options.entry_points]
console_scripts =
openneuro-py = openneuro.openneuro:cli

[tool:pytest]
addopts = -s

0 comments on commit 805f79c

Please sign in to comment.