diff --git a/pex/crawler.py b/pex/crawler.py index 84854f14f..19ce52b2e 100644 --- a/pex/crawler.py +++ b/pex/crawler.py @@ -101,7 +101,7 @@ def crawl(self, link_or_links, follow_links=False): def execute(): while not converged.is_set(): try: - link = queue.get(timeout=0.1) + link = queue.get(timeout=0.01) except Empty: continue if link not in seen: @@ -134,7 +134,6 @@ def execute(): queue.join() converged.set() - for worker in workers: - worker.join() - + # We deliberately not join back the worker threads, since they are no longer of + # any use to us. return links diff --git a/pex/link.py b/pex/link.py index a0a82ba52..9feee4e53 100644 --- a/pex/link.py +++ b/pex/link.py @@ -6,6 +6,7 @@ from .compatibility import string as compatible_string from .compatibility import PY3 +from .util import Memoizer if PY3: import urllib.parse as urlparse @@ -50,10 +51,17 @@ def wrap_iterable(cls, url_or_urls): def _normalize(cls, filename): return 'file://' + os.path.realpath(os.path.expanduser(filename)) + # A cache for the result of from_filename + _FROM_FILENAME_CACHE = Memoizer() + @classmethod def from_filename(cls, filename): """Return a :class:`Link` wrapping the local filename.""" - return cls(cls._normalize(filename)) + result = cls._FROM_FILENAME_CACHE.get(filename) + if result is None: + result = cls(cls._normalize(filename)) + cls._FROM_FILENAME_CACHE.store(filename, result) + return result def __init__(self, url): """Construct a :class:`Link` from a url. diff --git a/pex/package.py b/pex/package.py index c813bf321..f50f2c71a 100644 --- a/pex/package.py +++ b/pex/package.py @@ -11,6 +11,7 @@ from .link import Link from .pep425 import PEP425, PEP425Extras from .platforms import Platform +from .util import Memoizer class Package(Link): @@ -22,6 +23,9 @@ class InvalidPackage(Error): pass # The registry of concrete implementations _REGISTRY = set() + # The cache of packages that we have already constructed. + _HREF_TO_PACKAGE_CACHE = Memoizer() + @classmethod def register(cls, package_type): """Register a concrete implementation of a Package to be recognized by pex.""" @@ -37,12 +41,19 @@ def from_href(cls, href, **kw): :type href: string :returns: A Package object if a valid concrete implementation exists, otherwise None. """ - href = Link.wrap(href) + package = cls._HREF_TO_PACKAGE_CACHE.get(href) + if package is not None: + return package + link_href = Link.wrap(href) for package_type in cls._REGISTRY: try: - return package_type(href.url, **kw) + package = package_type(link_href.url, **kw) + break except package_type.InvalidPackage: continue + if package is not None: + cls._HREF_TO_PACKAGE_CACHE.store(href, package) + return package @property def name(self): diff --git a/pex/util.py b/pex/util.py index 5d9b30c4d..9dba244e1 100644 --- a/pex/util.py +++ b/pex/util.py @@ -9,6 +9,7 @@ import shutil import uuid from hashlib import sha1 +from threading import Lock from pkg_resources import find_distributions @@ -145,3 +146,19 @@ def cache_distribution(cls, zf, source, target_dir): dist = DistributionHelper.distribution_from_path(target_dir) assert dist is not None, 'Failed to cache distribution %s' % source return dist + + +class Memoizer(object): + """A thread safe class for memoizing the results of a computation.""" + + def __init__(self): + self._data = {} + self._lock = Lock() + + def get(self, key, default=None): + with self._lock: + return self._data.get(key, default) + + def store(self, key, value): + with self._lock: + self._data[key] = value