diff --git a/robotstxt.php b/robotstxt.php index 7a789a1..a4838f6 100644 --- a/robotstxt.php +++ b/robotstxt.php @@ -74,6 +74,20 @@ class robotstxt $this->allow = array ("/"); return $this; } + + // The sitemaps are not restricted to the user-agent + foreach (preg_grep ("~Sitemap:\s+~i", $content) as $nb => $line) + { + $url = $this->getValueFromLine ($line); + if (!! filter_var ($url, FILTER_VALIDATE_URL) && + (substr ($url, 0, 7) === "http://" || + substr ($url, 0, 8) === "https://")) + $this->sitemaps[] = $url; + else + $this->errors[$nb] = dgettext ("domframework", + "Sitemap : Invalid URL provided"); + } + // Get the Allow and Disallow lines. The stop will arrive on first // User-Agent line arriving after a Allow/Disallow. // Comments and empty lines are removed @@ -82,14 +96,9 @@ class robotstxt $line = trim ($content[$i]); if (stripos ($line, "Sitemap:") === 0) { - $url = $this->getValueFromLine ($line); - if (!! filter_var ($url, FILTER_VALIDATE_URL) && - (substr ($url, 0, 7) === "http://" || - substr ($url, 0, 8) === "https://")) - $this->sitemaps[] = $url; - else - $this->errors[$i] = dgettext ("domframework", - "Sitemap : Invalid URL provided"); + // Already managed in the general parser. Not needed in the specific + // user-agent parser. Must at least be catched to not generate an + // error } elseif (stripos ($line, "Host:") === 0) {