-
Notifications
You must be signed in to change notification settings - Fork 0
/
crawler_test.php
84 lines (72 loc) · 1.85 KB
/
crawler_test.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
<?php
require('simple_html_dom.php');
//to get weather.com.cn
//add comment again
/*
simple changes
*/
//get nmc
$prv = 'GD';
$city = 'shenzhen';
$add = "http://www.nmc.gov.cn/publish/forecast/A$prv/$city.html";
$ifadd = "http://www.nmc.gov.cn/publish/forecast/A$prv/{$city}_iframe.html";
echo $add."\t".$ifadd."\n";
echo "获取中央气象台数据ing\n";
$mainhtml = file_get_html($add);
$ifm = file_get_html($ifadd);
echo "获取完毕\n";
//得到实时数据
$scr = $ifm->find('script');
$rjs = '';
foreach($scr as $s)
{
if (strlen(trim($s->innertext)))
{
$rjs = $rjs.$s->innertext;
}
}
echo preg_match("/\d{4}-\d{2}-\d{2} \d{2}:\d{2}/", $rjs, $matches);
$updateTime = $matches[0];
echo "update time: ".$updateTime."\n";
$curstate = $ifm->find('div.city_wind div.temp_pic');
foreach($curstate as $cur)
{
$texts = $cur->find('text');
foreach($texts as $text)
{
if ($text->parent() == $cur){
$tmp = trim(iconv("gbk", "utf8", $text));
if (strlen($tmp))
echo $tmp."\n";
}
}
/*
foreach($cur->children() as $child)
{
echo gettype($child)." ".iconv("gbk", "utf8", $child->innertext)."\n";
}
*/
}
//得到6小时精细数据
$foretable = $mainhtml->find('table#snwfd tr');
//header
preg_match('/\d{4}-\d{2}-\d{2} \d{2}:\d{2}/', $foretable[0].innertext, $matches);
$updateTime = $matches[0];
echo "update time: $updateTime\n";
foreach($foretable as $t)
{
//pass header
if ($t->id == 'snwfd_head')
continue;
$tds = $t->find('td');
preg_match('/\d{2}:\d{2}-\d{2}:\d{2}/', $tds[0]->innertext, $matches);
$timerange = $matches[0];
$status = $tds[1]->find('text', 0);
$max = $tds[2]->innertext;
$min = $tds[3]->innertext;
$wd = $tds[4]->innertext;
$ws = $tds[5]->innertext;
$wt = $tds[6]->innertext;
echo iconv("gb2312", "utf8", "($timerange): $status, $min~$max, $wd, $ws, $wt\n");
}
?>