diff --git a/pangolin/utils/data_checks.py b/pangolin/utils/data_checks.py index 05278b5..80b1e6a 100644 --- a/pangolin/utils/data_checks.py +++ b/pangolin/utils/data_checks.py @@ -95,13 +95,6 @@ def get_assignment_cache(cache_file, config): 'pangolin-assignment repository (that will make future data updates slower).\n')) sys.exit(-1) - # Check versions of pangolin-data and pangolin-assignment to make sure they are consistent. - if pangolin_assignment.__version__.lstrip('v') != config[KEY_PANGOLIN_DATA_VERSION].lstrip('v'): - print(cyan(f'Error: pangolin_assignment cache version {pangolin_assignment.__version__} ' - f'does not match pangolin_data version {config[KEY_PANGOLIN_DATA_VERSION]}. ' - 'Run "pangolin --update-data" to fetch latest versions of both.')) - sys.exit(-1) - try: with gzip.open(cache, 'rt') as f: line = f.readline() diff --git a/pangolin/utils/update.py b/pangolin/utils/update.py index 721a9eb..758103f 100644 --- a/pangolin/utils/update.py +++ b/pangolin/utils/update.py @@ -3,6 +3,7 @@ import os import sys import json +import re import shutil import tarfile import subprocess @@ -14,8 +15,10 @@ version_dict_keys = ['pangolin', 'scorpio', 'pangolin-data', 'constellations', 'pangolin-assignment'] +dependency_web_dir = { 'pangolin-assignment': 'https://hgdownload.gi.ucsc.edu/goldenPath/wuhCor1/pangolin-assignment' } -def get_latest_release(dependency): + +def get_latest_cov_lineages(dependency): """ Using the github releases API check for the latest release of dependency and its tarball """ @@ -29,8 +32,8 @@ def get_latest_release(dependency): # so if this is thrown and there is definitely connectivity then # double check the version labels except Exception as e: - sys.stderr.write(cyan("Unable to connect to reach github API " - "--update/--data_update requires internet " + sys.stderr.write(cyan("Unable to connect to reach github API. " + "--update/--update-data requires internet " "connectivity so may not work on certain " "systems or if your IP has exceeded the " f"5,000 request per hour limit\n{e}\n")) @@ -49,32 +52,58 @@ def get_latest_release(dependency): return latest_release, latest_release_tarball -def git_lfs_install(): +def get_latest_web_dir(dependency, web_dir): """ - 'git-lfs install' must be run after installing git-lfs and before cloning a repo - that uses Git LFS. + Find the tarball url with the latest release from a web directory with versioned tarballs + instead of github. An HTTP GET of the web directory must return some text that contains + names of files in that directory, some of which are {dependency}-{version}.tar.gz. """ try: - subprocess.run(['git-lfs', 'install'], - check=True, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL) - except subprocess.CalledProcessError as e: - stderr = e.stderr.decode('utf-8') - sys.stderr.write(cyan(f"Error: {e}:\n{stderr}\n")) + listing = request.urlopen(web_dir).read().decode('utf-8') + except: + sys.stderr.write(cyan(f"Unable to read {web_dir}")) + sys.exit(-1) + tarRe = re.compile(f"{dependency}-(.*?).tar.gz") + matches = list(set(tarRe.findall(listing))) + if not matches: + sys.stderr.write(cyan(f"Can't find {dependency}-.tar.gz files in listing of {web_dir}")) sys.exit(-1) + versions = [LooseVersion(v) for v in matches] + versions.sort() + latest_release = str(versions[-1]) + latest_release_tarball = f"{web_dir}/{dependency}-{latest_release}.tar.gz" + return latest_release, latest_release_tarball + + +def get_latest_release(dependency): + """ + If dependency comes from a web directory then find latest release and tarball there, otherwise + query github API for cov-lineages repo + """ + if dependency in dependency_web_dir: + return get_latest_web_dir(dependency, dependency_web_dir[dependency]) + else: + return get_latest_cov_lineages(dependency) -def pip_install_dep(dependency, release): + +def pip_install_url(url): """ - Use pip install to install a cov-lineages repository with the specificed release + Use pip install to install a package from a url. """ - url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}" subprocess.run([sys.executable, '-m', 'pip', 'install', '--upgrade', url], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) +def pip_install_cov_lineages(dependency, release): + """ + Use pip install to install a cov-lineages repository with the specified release + """ + url = f"git+https://github.com/cov-lineages/{dependency}.git@{release}" + pip_install_url(url) + + def install_pangolin_assignment(): """ If the pangolin-assignment repo has not been installed already then install the latest release. @@ -84,9 +113,8 @@ def install_pangolin_assignment(): print(f"pangolin-assignment already installed with version {pangolin_assignment.__version__}; use --update or --update-data if you wish to update it.", file=sys.stderr) except: - git_lfs_install() latest_release, tarball = get_latest_release('pangolin-assignment') - pip_install_dep('pangolin-assignment', latest_release) + pip_install_url(tarball) print(f"pangolin-assignment installed with latest release ({latest_release})") @@ -106,7 +134,7 @@ def update(version_dictionary, data_dir=None): Using the github releases API check for the latest current release of the set of dependencies provided e.g., pangolin, scorpio, pangolin-data and constellations for complete --update and just pangolearn and constellations - for --update_data. If pangolin-assignment has been added to the installation + for --update_data. If pangolin-assignment has been added to version_dictionary then it will be included in both --update and --update-data. Dictionary keys must be one of pangolin, scorpio, pangolin-data, constellations @@ -170,7 +198,10 @@ def update(version_dictionary, data_dir=None): shutil.rmtree(destination_directory) shutil.move(os.path.join(tempdir, extracted_dir, dependency_package), destination_directory) else: - pip_install_dep(dependency, latest_release) + if dependency in dependency_web_dir: + pip_install_url(latest_release_tarball) + else: + pip_install_cov_lineages(dependency, latest_release) print(f"{dependency} updated to {latest_release}", file=sys.stderr) elif version > latest_release_tidied: print(f"{dependency} ({version}) is newer than latest stable "