RobotsTXT : Manage correctely the Sitemaps
This commit is contained in:
@@ -74,6 +74,20 @@ class robotstxt
|
||||
$this->allow = array ("/");
|
||||
return $this;
|
||||
}
|
||||
|
||||
// The sitemaps are not restricted to the user-agent
|
||||
foreach (preg_grep ("~Sitemap:\s+~i", $content) as $nb => $line)
|
||||
{
|
||||
$url = $this->getValueFromLine ($line);
|
||||
if (!! filter_var ($url, FILTER_VALIDATE_URL) &&
|
||||
(substr ($url, 0, 7) === "http://" ||
|
||||
substr ($url, 0, 8) === "https://"))
|
||||
$this->sitemaps[] = $url;
|
||||
else
|
||||
$this->errors[$nb] = dgettext ("domframework",
|
||||
"Sitemap : Invalid URL provided");
|
||||
}
|
||||
|
||||
// Get the Allow and Disallow lines. The stop will arrive on first
|
||||
// User-Agent line arriving after a Allow/Disallow.
|
||||
// Comments and empty lines are removed
|
||||
@@ -82,14 +96,9 @@ class robotstxt
|
||||
$line = trim ($content[$i]);
|
||||
if (stripos ($line, "Sitemap:") === 0)
|
||||
{
|
||||
$url = $this->getValueFromLine ($line);
|
||||
if (!! filter_var ($url, FILTER_VALIDATE_URL) &&
|
||||
(substr ($url, 0, 7) === "http://" ||
|
||||
substr ($url, 0, 8) === "https://"))
|
||||
$this->sitemaps[] = $url;
|
||||
else
|
||||
$this->errors[$i] = dgettext ("domframework",
|
||||
"Sitemap : Invalid URL provided");
|
||||
// Already managed in the general parser. Not needed in the specific
|
||||
// user-agent parser. Must at least be catched to not generate an
|
||||
// error
|
||||
}
|
||||
elseif (stripos ($line, "Host:") === 0)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user