From 81e0225f492940faf4f86c6912649497e47cf56d Mon Sep 17 00:00:00 2001
From: Mike Stucka <stucka@whitedoggies.com>
Date: Fri, 19 Jan 2024 13:17:57 -0500
Subject: [PATCH] Back out #600 for now

Scraper will break, transformer will come back.
---
 warn/scrapers/hi.py | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/warn/scrapers/hi.py b/warn/scrapers/hi.py
index 1052bf3..78421dd 100644
--- a/warn/scrapers/hi.py
+++ b/warn/scrapers/hi.py
@@ -1,14 +1,13 @@
 import datetime
 import logging
 from pathlib import Path
-from urllib.parse import quote
 
 from bs4 import BeautifulSoup
 
 from .. import utils
 
 __authors__ = ["Ash1R", "stucka"]
-__tags__ = ["html", "pdf"]
+__tags__ = ["html"]
 __source__ = {
     "name": "Workforce Development Hawaii",
     "url": "https://labor.hawaii.gov/wdc/real-time-warn-updates/",
@@ -29,17 +28,15 @@ def scrape(
     cache_dir -- the Path where results can be cached (default WARN_CACHE_DIR)
     Returns: the Path where the file is written
     """
-    cacheprefix = "https://webcache.googleusercontent.com/search?q=cache%3A"    # Use Google Cache, per #600
-
-    firstpage = utils.get_url(cacheprefix + quote("https://labor.hawaii.gov/wdc/real-time-warn-updates/"))
+    firstpage = utils.get_url("https://labor.hawaii.gov/wdc/real-time-warn-updates/")
     soup = BeautifulSoup(firstpage.text, features="html5lib")
     pagesection = soup.select("div.primary-content")[0]
     subpageurls = []
     for atag in pagesection.find_all("a"):
         href = atag["href"]
         if href.endswith("/"):
-            href = href         # [:-1]
-        subpageurls.append(cacheprefix + quote(href))
+            href = href[:-1]
+        subpageurls.append(href)
 
     headers = ["Company", "Date", "PDF url", "location", "jobs"]
     data = [headers]
@@ -88,8 +85,8 @@ def scrape(
             row.append(dates[i])
 
             row.append(url)
-            row.append(None)     # location
-            row.append(None)     # jobs
+            row.append(None)  # location
+            row.append(None)  # jobs
             data.append(row)
 
     output_csv = data_dir / "hi.csv"