From f411226d77093dd818501e3bc09e7c81d43eff36 Mon Sep 17 00:00:00 2001 From: Mike Stucka Date: Sun, 1 Oct 2023 15:59:50 -0400 Subject: [PATCH] Patch TX --- warn/scrapers/tx.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/warn/scrapers/tx.py b/warn/scrapers/tx.py index 9bf7a65f..b586b20f 100644 --- a/warn/scrapers/tx.py +++ b/warn/scrapers/tx.py @@ -12,7 +12,7 @@ __tags__ = ["html", "excel", "historical"] __source__ = { "name": "Texas Workforce Commission", - "url": "https://www.twc.texas.gov/businesses/worker-adjustment-and-retraining-notification-warn-notices#warnNotices", + "url": "https://www.twc.texas.gov/data-reports/warn-notice", } logger = logging.getLogger(__name__) @@ -35,7 +35,7 @@ def scrape( cache = Cache(cache_dir) # Get the root URL - url = "https://www.twc.texas.gov/businesses/worker-adjustment-and-retraining-notification-warn-notices#warnNotices" + url = "https://www.twc.texas.gov/data-reports/warn-notice" page = utils.get_url(url) html = page.text @@ -44,7 +44,8 @@ def scrape( # Get all the Excel links soup = BeautifulSoup(page.text, "html5lib") - link_list = soup.find_all("a", href=re.compile("^/files/news/warn-act-listings-")) + link_list = soup.find_all("a", href=re.compile("^/sites/default/files/oei/docs/warn-act-listings-")) + logger.debug(f"{len(link_list):,} spreadsheet links found") # Clean up the links and filter 'em down href_list = []