-
Notifications
You must be signed in to change notification settings - Fork 1k
/
BundestagParteispendenBridge.php
94 lines (76 loc) · 3.25 KB
/
BundestagParteispendenBridge.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
<?php
class BundestagParteispendenBridge extends BridgeAbstract
{
const MAINTAINER = 'mibe';
const NAME = 'Deutscher Bundestag - Parteispenden';
const URI = 'https://www.bundestag.de/parlament/praesidium/parteienfinanzierung/fundstellen50000';
const CACHE_TIMEOUT = 86400; // 24h
const DESCRIPTION = 'Returns the latest "soft money" donations to parties represented in the German Bundestag.';
const CONTENT_TEMPLATE = <<<TMPL
<p><b>Partei:</b><br>%s</p>
<p><b>Spendenbetrag:</b><br>%s</p>
<p><b>Spender:</b><br>%s</p>
<p><b>Eingang der Spende:</b><br>%s</p>
TMPL;
public function getIcon()
{
return 'https://www.bundestag.de/static/appdata/includes/images/layout/favicon.ico';
}
public function collectData()
{
$ajaxUri = <<<URI
https://www.bundestag.de/ajax/filterlist/de/parlament/praesidium/parteienfinanzierung/fundstellen50000/462002-462002
URI;
// Get the main page
$html = getSimpleHTMLDOMCached($ajaxUri, self::CACHE_TIMEOUT)
or returnServerError('Could not request AJAX list.');
// Build the URL from the first anchor element. The list is sorted by year, descending, so the first element is the current year.
$firstAnchor = $html->find('a', 0)
or returnServerError('Could not find the proper HTML element.');
$url = 'https://www.bundestag.de' . $firstAnchor->href;
// Get the actual page with the soft money donations
$html = getSimpleHTMLDOMCached($url, self::CACHE_TIMEOUT)
or returnServerError('Could not request ' . $url);
$rows = $html->find('table.table > tbody > tr')
or returnServerError('Could not find the proper HTML elements.');
foreach ($rows as $row) {
$item = $this->generateItemFromRow($row);
if (is_array($item)) {
$item['uri'] = $url;
$this->items[] = $item;
}
}
}
private function generateItemFromRow(simple_html_dom_node $row)
{
// The row must have 5 columns. There are monthly header rows, which are ignored here.
if (count($row->children) != 5) {
return null;
}
$item = [];
// | column | paragraph inside column
$party = $row->children[0]->children[0]->innertext;
$amount = $row->children[1]->children[0]->innertext . ' €';
$donor = $row->children[2]->children[0]->innertext;
$date = $row->children[3]->children[0]->innertext;
$dip = $row->children[4]->children[0]->find('a.dipLink', 0);
// Strip whitespace from date string.
$date = str_replace(' ', '', $date);
$content = sprintf(self::CONTENT_TEMPLATE, $party, $amount, $donor, $date);
$item = [
'title' => $party . ': ' . $amount,
'content' => $content,
'uid' => sha1($content),
];
// Try to get the link to the official document
if ($dip != null) {
$item['enclosures'] = [$dip->href];
}
// Try to parse the date
$dateTime = DateTime::createFromFormat('d.m.Y', $date);
if ($dateTime !== false) {
$item['timestamp'] = $dateTime->getTimestamp();
}
return $item;
}
}