-
Notifications
You must be signed in to change notification settings - Fork 1k
/
UsenixBridge.php
69 lines (59 loc) · 2.15 KB
/
UsenixBridge.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
<?php
declare(strict_types=1);
final class UsenixBridge extends BridgeAbstract
{
const NAME = 'USENIX';
const URI = 'https://www.usenix.org/publications';
const DESCRIPTION = 'Digital publications from USENIX (usenix.org)';
const MAINTAINER = 'dvikan';
const PARAMETERS = [
'USENIX ;login:' => [
],
];
public function collectData()
{
if ($this->queriedContext === 'USENIX ;login:') {
$this->collectLoginOnlineItems();
return;
}
returnClientError('Illegal Context');
}
private function collectLoginOnlineItems(): void
{
$url = 'https://www.usenix.org/publications/loginonline';
$dom = getSimpleHTMLDOMCached($url);
$items = $dom->find('div.view-content > div');
foreach ($items as $item) {
$title = $item->find('.views-field-title > span', 0);
$author = $item->find('.views-field-pseudo-author-list > span.field-content', 0);
$relativeUrl = $item->find('.views-field-nothing-1 > span > a', 0);
$uri = sprintf('https://www.usenix.org%s', $relativeUrl->href);
// June 2, 2022
$createdAt = $item->find('div.views-field-field-lv2-publication-date > div > span', 0);
$item = [
'title' => $title->innertext,
'author' => strstr($author->plaintext, ',', true) ?: $author->plaintext,
'uri' => $uri,
'timestamp' => $createdAt->innertext,
];
$this->items[] = array_merge($item, $this->getItemContent($uri));
}
}
private function getItemContent(string $uri): array
{
$html = getSimpleHTMLDOMCached($uri);
$content = $html->find('.paragraphs-items-full', 0)->innertext;
$extra = $html->find('fieldset', 0);
if (!empty($extra)) {
$content .= $extra->innertext;
}
$tags = [];
foreach ($html->find('.field-name-field-lv2-tags div.field-item') as $tag) {
$tags[] = $tag->plaintext;
}
return [
'content' => $content,
'categories' => $tags
];
}
}