From b25a779d98e4ed674496e6184b424e0a594f2364 Mon Sep 17 00:00:00 2001 From: Pavel Korytov Date: Thu, 8 Aug 2024 01:27:33 +0300 Subject: [PATCH] [TldrTechBridge] Fix bridge (#4187) * [TldrTechBridge] Fix bridge * yup --------- Co-authored-by: Dag --- bridges/TldrTechBridge.php | 64 ++++++++++++++++++++++---------------- 1 file changed, 38 insertions(+), 26 deletions(-) diff --git a/bridges/TldrTechBridge.php b/bridges/TldrTechBridge.php index d29553479ba..6c96dff7dd3 100644 --- a/bridges/TldrTechBridge.php +++ b/bridges/TldrTechBridge.php @@ -1,12 +1,12 @@ getInput('topic'); $limit = $this->getInput('limit'); - $url = self::URI . $topic . '/archives'; - $html = getSimpleHTMLDOM($url); - $entries_root = $html->find('div.content-center.mt-5', 0); - $added = 0; + + $url = self::URI . 'api/latest/' . $topic; + $response = getContents($url, [], [], true); + $location = $response->getHeader('Location'); + $locationUrl = Url::fromString($location); + + $this->extractItem($locationUrl); + + $archives_url = self::URI . $topic . '/archives'; + $archives_html = getSimpleHTMLDOM($archives_url); + $entries_root = $archives_html->find('div.content-center.mt-5', 0); foreach ($entries_root->children() as $child) { if ($child->tag != 'a') { continue; } - // Convert //2023-01-01 to unix timestamp - $date_items = explode('/', $child->href); - $date = strtotime(end($date_items)); - $item_url = self::URI . ltrim($child->href, '/'); - try { - $this->items[] = [ - 'uri' => self::URI . $child->href, - 'title' => $child->plaintext, - 'timestamp' => $date, - 'content' => $this->extractContent($item_url), - ]; - } catch (HttpException $e) { - continue; - } - $added++; - if ($added >= $limit) { + $this->extractItem(Url::fromString(self::URI . $child->href)); + if (count($this->items) >= $limit) { break; } } } + private function extractItem(Url $url) + { + $pathParts = explode('/', $url->getPath()); + $date = strtotime(end($pathParts)); + try { + [$content, $title] = $this->extractContent($url); + + $this->items[] = [ + 'uri' => (string) $url, + 'title' => $title, + 'timestamp' => $date, + 'content' => $content, + ]; + } catch (HttpException $e) { + // archive occasionally returns broken URLs + return; + } + } + private function extractContent($url) { - $html = getSimpleHTMLDOM($url); + $html = getSimpleHTMLDOMCached($url); $content = $html->find('div.content-center.mt-5', 0); if (!$content) { - throw new HttpException('Could not find content', 500); + throw new \Exception('Could not find content'); } $subscribe_form = $content->find('div.mt-5 > div > form', 0); if ($subscribe_form) { @@ -112,7 +124,7 @@ private function extractContent($url) } } } - - return $content->innertext; + $title = $content->find('h2', 0); + return [$content->innertext, $title->plaintext]; } }