robotsTxt : update Tests
git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@5343 bf3deb0d-5f1a-0410-827f-c0cc1f45334c
This commit is contained in:
File diff suppressed because it is too large
Load Diff
@@ -16,10 +16,6 @@ class robotstxt
|
||||
{
|
||||
// PROPERTIES
|
||||
// {{{
|
||||
/** The cralwer name wanted in robots.txt
|
||||
*/
|
||||
private $crawlerName = "DomSearch";
|
||||
|
||||
/** The allowed urls
|
||||
*/
|
||||
private $allow = array ();
|
||||
@@ -40,7 +36,7 @@ class robotstxt
|
||||
*/
|
||||
private $host = null;
|
||||
|
||||
/** The rule matchine the URLAllow rule
|
||||
/** The rule number matching the URLAllow rule
|
||||
*/
|
||||
private $matchRule = null;
|
||||
// }}}
|
||||
@@ -48,9 +44,10 @@ class robotstxt
|
||||
// METHODS
|
||||
/** Get the robots.txt file content and do the analyze
|
||||
* @param string $content The robots.txt file content to analyze
|
||||
* @param string $crawlerName The crawler name to use in analyze
|
||||
* @return $this
|
||||
*/
|
||||
public function __construct ($content)
|
||||
public function analyze ($content, $crawlerName)
|
||||
// {{{
|
||||
{
|
||||
if (trim ($content) === "")
|
||||
@@ -62,7 +59,7 @@ class robotstxt
|
||||
// If Not check if the User-agent axists for *
|
||||
// If not accept all
|
||||
$content = preg_split('/\r\n|\r|\n/', $content);
|
||||
$keys = preg_grep ("~User-agent:\s*$this->crawlerName\s*#?~i", $content);
|
||||
$keys = preg_grep ("~User-agent:\s*$crawlerName\s*#?~i", $content);
|
||||
if (empty ($keys))
|
||||
$keys = preg_grep ("~User-agent:\s*\\*\s*#?~i", $content);
|
||||
if (empty ($keys))
|
||||
@@ -133,6 +130,9 @@ class robotstxt
|
||||
{
|
||||
$parse = parse_url ($url);
|
||||
$path = (isset ($parse["path"])) ? $parse["path"] : "/";
|
||||
// Robots.txt files are always allowed
|
||||
if ($path === "/robots.txt")
|
||||
return true;
|
||||
$bestDisallow = -1;
|
||||
$bestAllow = -1;
|
||||
$allowRule = "";
|
||||
@@ -179,13 +179,13 @@ class robotstxt
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($bestAllow < $bestDisallow)
|
||||
if ($bestAllow > $bestDisallow)
|
||||
{
|
||||
$this->matchRule = $disallowRule;
|
||||
return false;
|
||||
$this->matchRule = $allowRule;
|
||||
return true;
|
||||
}
|
||||
$this->matchRule = $allowRule;
|
||||
return true;
|
||||
$this->matchRule = $disallowRule;
|
||||
return false;
|
||||
}
|
||||
// }}}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user