Skip to content

Commit

Permalink
Merge pull request #579 from stucka/tx-newsite
Browse files Browse the repository at this point in the history
Patch TX
  • Loading branch information
stucka authored Oct 1, 2023
2 parents d0d7b94 + f411226 commit 26356c5
Showing 1 changed file with 4 additions and 3 deletions.
7 changes: 4 additions & 3 deletions warn/scrapers/tx.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
__tags__ = ["html", "excel", "historical"]
__source__ = {
"name": "Texas Workforce Commission",
"url": "https://www.twc.texas.gov/businesses/worker-adjustment-and-retraining-notification-warn-notices#warnNotices",
"url": "https://www.twc.texas.gov/data-reports/warn-notice",
}

logger = logging.getLogger(__name__)
Expand All @@ -35,7 +35,7 @@ def scrape(
cache = Cache(cache_dir)

# Get the root URL
url = "https://www.twc.texas.gov/businesses/worker-adjustment-and-retraining-notification-warn-notices#warnNotices"
url = "https://www.twc.texas.gov/data-reports/warn-notice"
page = utils.get_url(url)
html = page.text

Expand All @@ -44,7 +44,8 @@ def scrape(

# Get all the Excel links
soup = BeautifulSoup(page.text, "html5lib")
link_list = soup.find_all("a", href=re.compile("^/files/news/warn-act-listings-"))
link_list = soup.find_all("a", href=re.compile("^/sites/default/files/oei/docs/warn-act-listings-"))
logger.debug(f"{len(link_list):,} spreadsheet links found")

# Clean up the links and filter 'em down
href_list = []
Expand Down

0 comments on commit 26356c5

Please sign in to comment.