forked from cehkunal/pentest-tools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gg-extract-links.php
61 lines (47 loc) · 1.26 KB
/
gg-extract-links.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
<?php
function usage( $err=null ) {
echo 'Usage: '.$_SERVER['argv'][0]." <source file>\n";
if( $err ) {
echo 'Error: '.$err."\n";
}
exit();
}
if( $_SERVER['argc'] != 2 ) {
usage();
}
$src = $_SERVER['argv'][1];
if( !is_file($src) ) {
usage( 'cannot find source file !' );
}
$content = file_get_contents( $_SERVER['argv'][1] );
$content = urldecode( html_entity_decode($content) );
//var_dump( $content );
$t_links = [];
$doc = new DOMDocument();
$doc->preserveWhiteSpace = false;
@$doc->loadHTML( $content );
$xpath = new DOMXPath( $doc );
//$t_result = $xpath->query("//*[@class='r']/a");
$t_result = $xpath->query("//h3//a[@href]");
//var_dump( $t_result );
//exit();
foreach( $t_result as $r )
{
$lnk = $r->ownerDocument->saveHTML( $r );
preg_match_all( '#href="([^"]*)"#', $lnk, $tmp );
$full_url = str_ireplace( '/url?q=', '', $tmp[1][0] );
//var_dump( $full_url );
$t_info = parse_url( $full_url );
//var_dump( $t_info );
$t_links[] = $full_url;
/*$a = preg_match( '#(.*)\.s3.amazonaws\.com#', $t_info['host'], $m );
if( $a ) {
$t_buckets[] = $m[1];
} else {
$tmp = explode( '/', $t_info['path'] );
$t_buckets[] = $tmp[1];
}*/
}
echo implode( "\n", $t_links )."\n";
exit();
?>