Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Install pangolin-assignment from UCSC download server instead of github #430

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions pangolin/utils/data_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,6 @@ def get_assignment_cache(cache_file, config):
'pangolin-assignment repository (that will make future data updates slower).\n'))
sys.exit(-1)

# Check versions of pangolin-data and pangolin-assignment to make sure they are consistent.
if pangolin_assignment.__version__.lstrip('v') != config[KEY_PANGOLIN_DATA_VERSION].lstrip('v'):
print(cyan(f'Error: pangolin_assignment cache version {pangolin_assignment.__version__} '
f'does not match pangolin_data version {config[KEY_PANGOLIN_DATA_VERSION]}. '
'Run "pangolin --update-data" to fetch latest versions of both.'))
sys.exit(-1)

try:
with gzip.open(cache, 'rt') as f:
line = f.readline()
Expand Down
71 changes: 51 additions & 20 deletions pangolin/utils/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import sys
import json
import re
import shutil
import tarfile
import subprocess
Expand All @@ -14,8 +15,10 @@

version_dict_keys = ['pangolin', 'scorpio', 'pangolin-data', 'constellations', 'pangolin-assignment']

dependency_web_dir = { 'pangolin-assignment': 'https://hgdownload.gi.ucsc.edu/goldenPath/wuhCor1/pangolin-assignment' }

def get_latest_release(dependency):

def get_latest_cov_lineages(dependency):
"""
Using the github releases API check for the latest release of dependency and its tarball
"""
Expand All @@ -29,8 +32,8 @@ def get_latest_release(dependency):
# so if this is thrown and there is definitely connectivity then
# double check the version labels
except Exception as e:
sys.stderr.write(cyan("Unable to connect to reach github API "
"--update/--data_update requires internet "
sys.stderr.write(cyan("Unable to connect to reach github API. "
"--update/--update-data requires internet "
"connectivity so may not work on certain "
"systems or if your IP has exceeded the "
f"5,000 request per hour limit\n{e}\n"))
Expand All @@ -49,32 +52,58 @@ def get_latest_release(dependency):
return latest_release, latest_release_tarball


def git_lfs_install():
def get_latest_web_dir(dependency, web_dir):
"""
'git-lfs install' must be run after installing git-lfs and before cloning a repo
that uses Git LFS.
Find the tarball url with the latest release from a web directory with versioned tarballs
instead of github. An HTTP GET of the web directory must return some text that contains
names of files in that directory, some of which are {dependency}-{version}.tar.gz.
"""
try:
subprocess.run(['git-lfs', 'install'],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError as e:
stderr = e.stderr.decode('utf-8')
sys.stderr.write(cyan(f"Error: {e}:\n{stderr}\n"))
listing = request.urlopen(web_dir).read().decode('utf-8')
except:
sys.stderr.write(cyan(f"Unable to read {web_dir}"))
sys.exit(-1)
tarRe = re.compile(f"{dependency}-(.*?).tar.gz")
matches = list(set(tarRe.findall(listing)))
if not matches:
sys.stderr.write(cyan(f"Can't find {dependency}-<version>.tar.gz files in listing of {web_dir}"))
sys.exit(-1)
versions = [LooseVersion(v) for v in matches]
versions.sort()
latest_release = str(versions[-1])
latest_release_tarball = f"{web_dir}/{dependency}-{latest_release}.tar.gz"
return latest_release, latest_release_tarball


def get_latest_release(dependency):
"""
If dependency comes from a web directory then find latest release and tarball there, otherwise
query github API for cov-lineages repo
"""
if dependency in dependency_web_dir:
return get_latest_web_dir(dependency, dependency_web_dir[dependency])
else:
return get_latest_cov_lineages(dependency)

def pip_install_dep(dependency, release):

def pip_install_url(url):
"""
Use pip install to install a cov-lineages repository with the specificed release
Use pip install to install a package from a url.
"""
url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}"
subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade', url],
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL)


def pip_install_cov_lineages(dependency, release):
"""
Use pip install to install a cov-lineages repository with the specified release
"""
url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}"
pip_install_url(url)


def install_pangolin_assignment():
"""
If the pangolin-assignment repo has not been installed already then install the latest release.
Expand All @@ -84,9 +113,8 @@ def install_pangolin_assignment():
print(f"pangolin-assignment already installed with version {pangolin_assignment.__version__}; use --update or --update-data if you wish to update it.", file=sys.stderr)

except:
git_lfs_install()
latest_release, tarball = get_latest_release('pangolin-assignment')
pip_install_dep('pangolin-assignment', latest_release)
pip_install_url(tarball)
print(f"pangolin-assignment installed with latest release ({latest_release})")


Expand All @@ -106,7 +134,7 @@ def update(version_dictionary, data_dir=None):
Using the github releases API check for the latest current release
of the set of dependencies provided e.g., pangolin, scorpio, pangolin-data and
constellations for complete --update and just pangolearn and constellations
for --update_data. If pangolin-assignment has been added to the installation
for --update_data. If pangolin-assignment has been added to version_dictionary
then it will be included in both --update and --update-data.

Dictionary keys must be one of pangolin, scorpio, pangolin-data, constellations
Expand Down Expand Up @@ -170,7 +198,10 @@ def update(version_dictionary, data_dir=None):
shutil.rmtree(destination_directory)
shutil.move(os.path.join(tempdir, extracted_dir, dependency_package), destination_directory)
else:
pip_install_dep(dependency, latest_release)
if dependency in dependency_web_dir:
pip_install_url(latest_release_tarball)
else:
pip_install_cov_lineages(dependency, latest_release)
print(f"{dependency} updated to {latest_release}", file=sys.stderr)
elif version > latest_release_tidied:
print(f"{dependency} ({version}) is newer than latest stable "
Expand Down