Skip to content

Commit

Permalink
End anchor fix
Browse files Browse the repository at this point in the history
  • Loading branch information
JanPetterMG committed Apr 24, 2016
1 parent e06ed9e commit e8e631b
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 54 deletions.
4 changes: 3 additions & 1 deletion src/RobotsTxtParser/Parser/Directives/DisAllow.php
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,11 @@ public function check($url)
*/
protected function getPath($url)
{
// Encode
$url = $this->urlEncode($url);
if (mb_stripos($url, '/') === 0) {
// URL already is a path
// Strip fragments
$url = mb_split('#', $url)[0];
return $url;
}
if (!$this->urlValidate($url)) {
Expand Down
24 changes: 13 additions & 11 deletions src/RobotsTxtParser/Parser/Toolbox.php
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,21 @@ protected function checkPath($path, $paths)
* @link https://github.com/hafriedlander/php-peg
*/
try {
$rule = str_replace('#', '\#', $rule);
if (preg_match('#' . $rule . '#', $path)) {
if (mb_stripos($rule, '$') !== false) {
/**
* Bug when not exact match
* @link https://github.com/t1gor/Robots.txt-Parser-Class/issues/63
*/
if (mb_strlen($rule) - 1 >= mb_strlen($path)) {
return true;
}
} else {
if (!preg_match('#' . $rule . '#', $path)) {
// Rule does not match
continue;
} else if (mb_stripos($rule, '$') === false) {
// No special parsing required
return true;
} else if (($wildcardPos = mb_strrpos($rule, '*')) !== false) {
// Rule contains both an end anchor ($) and wildcard (*)
$afterWildcard = mb_substr($rule, $wildcardPos + 1, mb_strlen($rule) - $wildcardPos - 2);
if ($afterWildcard == mb_substr($path, -mb_strlen($afterWildcard))) {
return true;
}
} else if (mb_substr($rule, 0, -1) == $path) {
// Rule does contains an end anchor
return true;
}
} catch (\Exception $e) {
// An preg_match bug has occurred
Expand Down
8 changes: 8 additions & 0 deletions tests/EndAnchorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public function testEndAnchor($robotsTxtContent)

$this->assertTrue($parser->userAgent()->isDisallowed('/asd/'));
$this->assertFalse($parser->userAgent()->isAllowed('/asd/'));

$this->assertTrue($parser->userAgent('DenyMe')->isDisallowed('http://example.com/deny_all/'));
$this->assertFalse($parser->userAgent('DenyMe')->isAllowed('http://example.com/deny_all/'));
}

/**
Expand All @@ -42,6 +45,11 @@ public function generateDataForTest()
User-Agent: *
Disallow: /*
Allow: /$
User-Agent: DenyMe
Disallow: /deny_all/$
Disallow: *deny_all/$
Disallow: deny_all/$
ROBOTS
]
];
Expand Down
42 changes: 0 additions & 42 deletions tests/EndAnchorWildcardTest.php

This file was deleted.

3 changes: 3 additions & 0 deletions tests/EscapingTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ public function testEscaping($robotsTxtContent)
$this->assertTrue($parser->userAgent()->isAllowed("/%5C."));
$this->assertFalse($parser->userAgent()->isDisallowed("/%5C."));

/**
* Additional tests to enable in the future, currently disabled due to bugs
*/
//$this->assertTrue($parser->userAgent()->isDisallowed("/("));
//$this->assertFalse($parser->userAgent()->isAllowed("/("));
}
Expand Down

0 comments on commit e8e631b

Please sign in to comment.