Skip to content

Commit

Permalink
use BridgeAbstract::file_get_html in all bridges
Browse files Browse the repository at this point in the history
instead of simple_html_dom function file_get_html

Signed-off-by: Pierre Mazière <[email protected]>
  • Loading branch information
Pierre Mazière committed Jun 25, 2016
1 parent 36d39d3 commit 955eecc
Show file tree
Hide file tree
Showing 123 changed files with 187 additions and 187 deletions.
2 changes: 1 addition & 1 deletion bridges/ABCTabsBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public function loadMetadatas() {

public function collectData(array $param){
$html = '';
$html = file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnError('No results for this query.', 404);
$html = $this->file_get_html('http://www.abc-tabs.com/tablatures/nouveautes.html') or $this->returnError('No results for this query.', 404);
$table = $html->find('table#myTable', 0)->children(1);

foreach ($table->find('tr') as $tab)
Expand Down
2 changes: 1 addition & 1 deletion bridges/AcrimedBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ protected function parseRSSItem($newsItem) {
$item->title = trim($newsItem->title);
$item->timestamp = strtotime($dc->date);

$articlePage = file_get_html($newsItem->link);
$articlePage = $this->file_get_html($newsItem->link);
$article = $hs->sanitize($articlePage->find('article.article1', 0)->innertext);
$article = HTMLSanitizer::defaultImageSrcTo($article, "http://www.acrimed.org/");

Expand Down
2 changes: 1 addition & 1 deletion bridges/AllocineFRBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public function loadMetadatas() {
}

public function collectData(array $param){
$html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
$html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);

foreach($html->find('figure.media-meta-fig') as $element)
{
Expand Down
2 changes: 1 addition & 1 deletion bridges/AllocineT5Bridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public function loadMetadatas() {
}

public function collectData(array $param){
$html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
$html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);

foreach($html->find('figure.media-meta-fig') as $element)
{
Expand Down
2 changes: 1 addition & 1 deletion bridges/AllocineTueursEnSerieBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public function loadMetadatas() {
}

public function collectData(array $param){
$html = file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);
$html = $this->file_get_html($this->_URL) or $this->returnError('Could not request Allo cine.', 404);

foreach($html->find('figure.media-meta-fig') as $element)
{
Expand Down
2 changes: 1 addition & 1 deletion bridges/AnimeUltimeBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public function collectData(array $param) {
//Retrive page contents
$website = 'http://www.anime-ultime.net/';
$url = $website.'history-0-1/'.$requestFilter;
$html = file_get_html($url) or $this->returnError('Could not request Anime-Ultime: '.$url, 500);
$html = $this->file_get_html($url) or $this->returnError('Could not request Anime-Ultime: '.$url, 500);

//Relases are sorted by day : process each day individually
foreach ($html->find('div.history', 0)->find('h3') as $daySection) {
Expand Down
2 changes: 1 addition & 1 deletion bridges/BandcampBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public function collectData(array $param){
$html = '';
if (isset($param['tag'])) {
$this->request = $param['tag'];
$html = file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnError('No results for this query.', 404);
$html = $this->file_get_html('http://bandcamp.com/tag/'.urlencode($this->request).'?sort_field=date') or $this->returnError('No results for this query.', 404);
}
else {
$this->returnError('You must specify tag (/tag/...)', 400);
Expand Down
4 changes: 2 additions & 2 deletions bridges/BastaBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ public function collectData(array $param){


function BastaExtractContent($url) {
$html2 = file_get_html($url);
$html2 = $this->file_get_html($url);
$text = $html2->find('div.texte', 0)->innertext;
return $text;
}
$html = file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnError('Could not request Bastamag.', 404);
$html = $this->file_get_html('http://www.bastamag.net/spip.php?page=backend') or $this->returnError('Could not request Bastamag.', 404);
$limit = 0;

foreach($html->find('item') as $element) {
Expand Down
2 changes: 1 addition & 1 deletion bridges/BlaguesDeMerdeBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public function loadMetadatas() {
}

public function collectData(array $param){
$html = file_get_html('http://www.blaguesdemerde.fr/') or $this->returnError('Could not request BDM.', 404);
$html = $this->file_get_html('http://www.blaguesdemerde.fr/') or $this->returnError('Could not request BDM.', 404);

foreach($html->find('article.joke_contener') as $element) {
$item = new Item();
Expand Down
2 changes: 1 addition & 1 deletion bridges/BooruprojectBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public function collectData(array $param){
if (empty($param['i'])) {
$this->returnError('Please enter a ***.booru.org instance.', 404);
}
$html = file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnError('Could not request Booruproject.', 404);
$html = $this->file_get_html("http://".$param['i'].".booru.org/index.php?page=post&s=list&pid=".$page.$tags) or $this->returnError('Could not request Booruproject.', 404);


foreach($html->find('div[class=content] span') as $element) {
Expand Down
4 changes: 2 additions & 2 deletions bridges/CADBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function CADUrl($string) {
}

function CADExtractContent($url) {
$html3 = file_get_html($url);
$html3 = $this->file_get_html($url);
$htmlpart = explode("/", $url);
if ($htmlpart[3] == 'cad')
preg_match_all("/http:\/\/cdn2\.cad-comic\.com\/comics\/cad-\S*png/", $html3, $url2);
Expand All @@ -36,7 +36,7 @@ function CADExtractContent($url) {
return '<img src="'.$img.'"/>';
}

$html = file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnError('Could not request CAD.', 404);
$html = $this->file_get_html('http://cdn2.cad-comic.com/rss.xml') or $this->returnError('Could not request CAD.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 5) {
Expand Down
4 changes: 2 additions & 2 deletions bridges/CNETBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ function CleanArticle($article_html) {
$this->topicName = $param['topic'];

$pageUrl = 'http://www.cnet.com/'.(empty($this->topicName) ? '' : 'topics/'.$this->topicName.'/');
$html = file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 500);
$html = $this->file_get_html($pageUrl) or $this->returnError('Could not request CNET: '.$pageUrl, 500);
$limit = 0;

foreach($html->find('div.assetBody') as $element) {
Expand All @@ -65,7 +65,7 @@ function CleanArticle($article_html) {

if (!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) {

$article_html = file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 500);
$article_html = $this->file_get_html($article_uri) or $this->returnError('Could not request CNET: '.$article_uri, 500);

if (is_null($article_thumbnail))
$article_thumbnail = $article_html->find('div.originalImage', 0);
Expand Down
4 changes: 2 additions & 2 deletions bridges/CoinDeskBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ function CoinDeskStripCDATA($string) {
return $string;
}
function CoinDeskExtractContent($url) {
$html2 = file_get_html($url);
$html2 = $this->file_get_html($url);
$text = $html2->find('div.single-content', 0)->innertext;
$text = strip_tags($text, '<p><a><img>');
return $text;
}
$html = file_get_html('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404);
$html = $this->file_get_html('http://www.coindesk.com/feed/atom/') or $this->returnError('Could not request CoinDesk.', 404);
$limit = 0;

foreach($html->find('entry') as $element) {
Expand Down
2 changes: 1 addition & 1 deletion bridges/CollegeDeFranceBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ public function collectData(array $param) {
* </a>
* </li>
*/
$html = file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnError('Could not request CollegeDeFrance.', 404);
$html = $this->file_get_html('http://www.college-de-france.fr/components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') or $this->returnError('Could not request CollegeDeFrance.', 404);
foreach($html->find('a[data-target]') as $element) {
$item = new \Item();
$item->title = $element->find('.title', 0)->plaintext;
Expand Down
4 changes: 2 additions & 2 deletions bridges/CommonDreamsBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ function CommonDreamsUrl($string) {
}

function CommonDreamsExtractContent($url) {
$html3 = file_get_html($url);
$html3 = $this->file_get_html($url);
$text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext;
$html3->clear();
unset ($html3);
return $text;
}

$html = file_get_html('http://www.commondreams.org/rss.xml') or $this->returnError('Could not request CommonDreams.', 404);
$html = $this->file_get_html('http://www.commondreams.org/rss.xml') or $this->returnError('Could not request CommonDreams.', 404);
$limit = 0;
foreach($html->find('item') as $element) {
if($limit < 4) {
Expand Down
2 changes: 1 addition & 1 deletion bridges/CopieDoubleBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function loadMetadatas() {


public function collectData(array $param){
$html = file_get_html('http://www.copie-double.com/') or $this->returnError('Could not request CopieDouble.', 404);
$html = $this->file_get_html('http://www.copie-double.com/') or $this->returnError('Could not request CopieDouble.', 404);
$table = $html->find('table table', 2);

foreach($table->find('tr') as $element)
Expand Down
4 changes: 2 additions & 2 deletions bridges/CourrierInternationalBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public function collectData(array $param){

$html = '';

$html = file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500);
$html = $this->file_get_html('http://www.courrierinternational.com/') or $this->returnError('Error.', 500);



Expand All @@ -33,7 +33,7 @@ public function collectData(array $param){
$item->uri = "http://courrierinternational.fr/".$item->uri;
}

$page = file_get_html($item->uri);
$page = $this->file_get_html($item->uri);

$cleaner = new HTMLSanitizer();

Expand Down
2 changes: 1 addition & 1 deletion bridges/CpasbienBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ public function collectData(array $param){
$html = '';
if (isset($param['q'])) { /* keyword search mode */
$this->request = str_replace(" ","-",trim($param['q']));
$html = file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnError('No results for this query.', 404);
$html = $this->file_get_html($this->uri.'/recherche/'.urlencode($this->request).'.html') or $this->returnError('No results for this query.', 404);
}
else {
$this->returnError('You must specify a keyword (?q=...).', 400);
Expand Down
2 changes: 1 addition & 1 deletion bridges/CryptomeBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public function collectData(array $param){
// If you want HTTPS access instead, uncomment the following line:
//$link = 'https://secure.netsolhost.com/cryptome.org/';

$html = file_get_html($link) or $this->returnError('Could not request Cryptome.', 404);
$html = $this->file_get_html($link) or $this->returnError('Could not request Cryptome.', 404);
if (!empty($param['n'])) { /* number of documents */
$num = min(max(1, $param['n']+0), $num);
}
Expand Down
8 changes: 4 additions & 4 deletions bridges/DailymotionBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public function collectData(array $param){

function getMetadata($id) {
$metadata=array();
$html2 = file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnError('Could not request Dailymotion.', 404);
$html2 = $this->file_get_html('http://www.dailymotion.com/video/'.$id) or $this->returnError('Could not request Dailymotion.', 404);
$metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content');
$metadata['timestamp'] = strtotime($html2->find('meta[property=video:release_date]', 0)->getAttribute('content') );
$metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content');
Expand All @@ -63,15 +63,15 @@ function getMetadata($id) {

if (isset($param['u'])) { // user timeline mode
$this->request = $param['u'];
$html = file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnError('Could not request Dailymotion.', 404);
$html = $this->file_get_html('http://www.dailymotion.com/user/'.urlencode($this->request).'/1') or $this->returnError('Could not request Dailymotion.', 404);
}
else if (isset($param['p'])) { // playlist mode
$this->request = strtok($param['p'], '_');
$html = file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnError('Could not request Dailymotion.', 404);
$html = $this->file_get_html('http://www.dailymotion.com/playlist/'.urlencode($this->request).'') or $this->returnError('Could not request Dailymotion.', 404);
}
else if (isset($param['s'])) { // search mode
$this->request = $param['s']; $page = 1; if (isset($param['pa'])) $page = (int)preg_replace("/[^0-9]/",'', $param['pa']);
$html = file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnError('Could not request Dailymotion.', 404);
$html = $this->file_get_html('http://www.dailymotion.com/search/'.urlencode($this->request).'/'.$page.'') or $this->returnError('Could not request Dailymotion.', 404);
}
else {
$this->returnError('You must either specify a Dailymotion username (?u=...) or a playlist id (?p=...) or search (?s=...)', 400);
Expand Down
2 changes: 1 addition & 1 deletion bridges/DanbooruBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public function collectData(array $param){
if (isset($param['t'])) {
$tags = urlencode($param['t']);
}
$html = file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnError('Could not request Danbooru.', 404);
$html = $this->file_get_html("http://donmai.us/posts?&page=$page&tags=$tags") or $this->returnError('Could not request Danbooru.', 404);
foreach($html->find('div[id=posts] article') as $element) {
$item = new \Item();
$item->uri = 'http://donmai.us'.$element->find('a', 0)->href;
Expand Down
2 changes: 1 addition & 1 deletion bridges/DansTonChatBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ public function collectData(array $param){
$html = '';
$link = 'http://danstonchat.com/latest.html';

$html = file_get_html($link) or $this->returnError('Could not request DansTonChat.', 404);
$html = $this->file_get_html($link) or $this->returnError('Could not request DansTonChat.', 404);

foreach($html->find('div.item') as $element) {
$item = new \Item();
Expand Down
6 changes: 3 additions & 3 deletions bridges/DauphineLibereBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,17 @@ public function collectData(array $param){


function ExtractContent($url) {
$html2 = file_get_html($url);
$html2 = $this->file_get_html($url);
$text = $html2->find('div.column', 0)->innertext;
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
return $text;
}
if (isset($param['u'])) { /* user timeline mode */
$this->request = $param['u'];
$html = file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404);
$html = $this->file_get_html('http://www.ledauphine.com/'.$this->request.'/rss') or $this->returnError('Could not request DauphineLibere.', 404);
}
else {
$html = file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404);
$html = $this->file_get_html('http://www.ledauphine.com/rss') or $this->returnError('Could not request DauphineLibere.', 404);
}
$limit = 0;

Expand Down
4 changes: 2 additions & 2 deletions bridges/DeveloppezDotComBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,13 @@ function convert_smart_quotes($string)//F***ing quotes from Microsoft Word badly
}

function DeveloppezDotComExtractContent($url) {
$articleHTMLContent = file_get_html($url);
$articleHTMLContent = $this->file_get_html($url);
$text = convert_smart_quotes($articleHTMLContent->find('div.content', 0)->innertext);
$text = utf8_encode($text);
return trim($text);
}

$rssFeed = file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404);
$rssFeed = $this->file_get_html('http://www.developpez.com/index/rss') or $this->returnError('Could not request http://www.developpez.com/index/rss', 404);
$limit = 0;

foreach($rssFeed->find('item') as $element) {
Expand Down
2 changes: 1 addition & 1 deletion bridges/DilbertBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function loadMetadatas() {

public function collectData(array $param) {

$html = file_get_html($this->getURI()) or $this->returnError('Could not request Dilbert: '.$this->getURI(), 500);
$html = $this->file_get_html($this->getURI()) or $this->returnError('Could not request Dilbert: '.$this->getURI(), 500);

foreach ($html->find('section.comic-item') as $element) {

Expand Down
2 changes: 1 addition & 1 deletion bridges/DollbooruBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public function collectData(array $param){
if (isset($param['t'])) {
$tags = urlencode($param['t']);
}
$html = file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnError('Could not request Dollbooru.', 404);
$html = $this->file_get_html("http://dollbooru.org/post/list/$tags/$page") or $this->returnError('Could not request Dollbooru.', 404);


foreach($html->find('div[class=shm-image-list] a') as $element) {
Expand Down
2 changes: 1 addition & 1 deletion bridges/DuckDuckGoBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public function collectData(array $param){
$html = '';
$link = 'http://duckduckgo.com/html/?q='.$param[u].'+sort:date';

$html = file_get_html($link) or $this->returnError('Could not request DuckDuckGo.', 404);
$html = $this->file_get_html($link) or $this->returnError('Could not request DuckDuckGo.', 404);

foreach($html->find('div.results_links') as $element) {
$item = new \Item();
Expand Down
2 changes: 1 addition & 1 deletion bridges/EZTVBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ function makeTimestamp($relativeReleaseTime){
foreach($showList as $showID){

// Get show page
$html = file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnError('Could not request EZTV for id "'.$showID.'"', 404);
$html = $this->file_get_html('https://eztv.ch/shows/'.rawurlencode($showID).'/') or $this->returnError('Could not request EZTV for id "'.$showID.'"', 404);

// Loop on each element that look like an episode entry...
foreach($html->find('.forum_header_border') as $element) {
Expand Down
2 changes: 1 addition & 1 deletion bridges/EliteDangerousGalnetBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ public function loadMetadatas()

public function collectData(array $param)
{
$html = file_get_html('https://community.elitedangerous.com/galnet') or $this->returnError('Error while downloading the website content', 404);
$html = $this->file_get_html('https://community.elitedangerous.com/galnet') or $this->returnError('Error while downloading the website content', 404);
foreach($html->find('div.article') as $element) {
$item = new Item();

Expand Down
4 changes: 2 additions & 2 deletions bridges/FSBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ function FS_StripCDATA($string) {
return $string;
}
function FS_ExtractContent($url) {
$html2 = file_get_html($url);
$html2 = $this->file_get_html($url);
$text = $html2->find('div.fiche-actualite', 0)->innertext;
$text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text);
return $text;
}
$html = file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404);
$html = $this->file_get_html('http://www.futura-sciences.com/rss/actualites.xml') or $this->returnError('Could not request Futura Sciences.', 404);
$limit = 0;

foreach($html->find('item') as $element) {
Expand Down
4 changes: 2 additions & 2 deletions bridges/FacebookBridge.php
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,9 @@ function ExtractFromDelimiters($string, $start, $end) {
if (is_null($html)) {
if (isset($param['u'])) {
if (!strpos($param['u'], "/")) {
$html = file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
$html = $this->file_get_html('https://www.facebook.com/'.urlencode($param['u']).'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
} else {
$html = file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
$html = $this->file_get_html('https://www.facebook.com/pages/'.$param['u'].'?_fb_noscript=1') or $this->returnError('No results for this query.', 404);
}
} else {
$this->returnError('You must specify a Facebook username.', 400);
Expand Down
Loading

0 comments on commit 955eecc

Please sign in to comment.