RobotsTXT : Manage correctely the Sitemaps
This commit is contained in:
@@ -74,6 +74,20 @@ class robotstxt
|
|||||||
$this->allow = array ("/");
|
$this->allow = array ("/");
|
||||||
return $this;
|
return $this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The sitemaps are not restricted to the user-agent
|
||||||
|
foreach (preg_grep ("~Sitemap:\s+~i", $content) as $nb => $line)
|
||||||
|
{
|
||||||
|
$url = $this->getValueFromLine ($line);
|
||||||
|
if (!! filter_var ($url, FILTER_VALIDATE_URL) &&
|
||||||
|
(substr ($url, 0, 7) === "http://" ||
|
||||||
|
substr ($url, 0, 8) === "https://"))
|
||||||
|
$this->sitemaps[] = $url;
|
||||||
|
else
|
||||||
|
$this->errors[$nb] = dgettext ("domframework",
|
||||||
|
"Sitemap : Invalid URL provided");
|
||||||
|
}
|
||||||
|
|
||||||
// Get the Allow and Disallow lines. The stop will arrive on first
|
// Get the Allow and Disallow lines. The stop will arrive on first
|
||||||
// User-Agent line arriving after a Allow/Disallow.
|
// User-Agent line arriving after a Allow/Disallow.
|
||||||
// Comments and empty lines are removed
|
// Comments and empty lines are removed
|
||||||
@@ -82,14 +96,9 @@ class robotstxt
|
|||||||
$line = trim ($content[$i]);
|
$line = trim ($content[$i]);
|
||||||
if (stripos ($line, "Sitemap:") === 0)
|
if (stripos ($line, "Sitemap:") === 0)
|
||||||
{
|
{
|
||||||
$url = $this->getValueFromLine ($line);
|
// Already managed in the general parser. Not needed in the specific
|
||||||
if (!! filter_var ($url, FILTER_VALIDATE_URL) &&
|
// user-agent parser. Must at least be catched to not generate an
|
||||||
(substr ($url, 0, 7) === "http://" ||
|
// error
|
||||||
substr ($url, 0, 8) === "https://"))
|
|
||||||
$this->sitemaps[] = $url;
|
|
||||||
else
|
|
||||||
$this->errors[$i] = dgettext ("domframework",
|
|
||||||
"Sitemap : Invalid URL provided");
|
|
||||||
}
|
}
|
||||||
elseif (stripos ($line, "Host:") === 0)
|
elseif (stripos ($line, "Host:") === 0)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user