Skip to content

Commit

Permalink
[TldrTechBridge] Fix bridge (#4187)
Browse files Browse the repository at this point in the history
* [TldrTechBridge] Fix bridge

* yup

---------

Co-authored-by: Dag <[email protected]>
  • Loading branch information
SqrtMinusOne and dvikan authored Aug 7, 2024
1 parent ee54cf4 commit b25a779
Showing 1 changed file with 38 additions and 26 deletions.
64 changes: 38 additions & 26 deletions bridges/TldrTechBridge.php
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
<?php

declare(strict_types=1);

class TldrTechBridge extends BridgeAbstract
{
const MAINTAINER = 'sqrtminusone';
const NAME = 'TLDR Tech Newsletter Bridge';
const URI = 'https://tldr.tech/';

const CACHE_TIMEOUT = 3600; // 1 hour
const DESCRIPTION = 'Return newsletter articles from TLDR Tech';

const PARAMETERS = [
Expand Down Expand Up @@ -41,41 +41,53 @@ public function collectData()
{
$topic = $this->getInput('topic');
$limit = $this->getInput('limit');
$url = self::URI . $topic . '/archives';
$html = getSimpleHTMLDOM($url);
$entries_root = $html->find('div.content-center.mt-5', 0);
$added = 0;

$url = self::URI . 'api/latest/' . $topic;
$response = getContents($url, [], [], true);
$location = $response->getHeader('Location');
$locationUrl = Url::fromString($location);

$this->extractItem($locationUrl);

$archives_url = self::URI . $topic . '/archives';
$archives_html = getSimpleHTMLDOM($archives_url);
$entries_root = $archives_html->find('div.content-center.mt-5', 0);
foreach ($entries_root->children() as $child) {
if ($child->tag != 'a') {
continue;
}
// Convert /<topic>/2023-01-01 to unix timestamp
$date_items = explode('/', $child->href);
$date = strtotime(end($date_items));
$item_url = self::URI . ltrim($child->href, '/');
try {
$this->items[] = [
'uri' => self::URI . $child->href,
'title' => $child->plaintext,
'timestamp' => $date,
'content' => $this->extractContent($item_url),
];
} catch (HttpException $e) {
continue;
}
$added++;
if ($added >= $limit) {
$this->extractItem(Url::fromString(self::URI . $child->href));
if (count($this->items) >= $limit) {
break;
}
}
}

private function extractItem(Url $url)
{
$pathParts = explode('/', $url->getPath());
$date = strtotime(end($pathParts));
try {
[$content, $title] = $this->extractContent($url);

$this->items[] = [
'uri' => (string) $url,
'title' => $title,
'timestamp' => $date,
'content' => $content,
];
} catch (HttpException $e) {
// archive occasionally returns broken URLs
return;
}
}

private function extractContent($url)
{
$html = getSimpleHTMLDOM($url);
$html = getSimpleHTMLDOMCached($url);
$content = $html->find('div.content-center.mt-5', 0);
if (!$content) {
throw new HttpException('Could not find content', 500);
throw new \Exception('Could not find content');
}
$subscribe_form = $content->find('div.mt-5 > div > form', 0);
if ($subscribe_form) {
Expand Down Expand Up @@ -112,7 +124,7 @@ private function extractContent($url)
}
}
}

return $content->innertext;
$title = $content->find('h2', 0);
return [$content->innertext, $title->plaintext];
}
}

0 comments on commit b25a779

Please sign in to comment.