Robots : values for Allow and Disallow must be empty or start by slash
This commit is contained in:
@@ -15,25 +15,25 @@ use Domframework\Robotstxt;
|
||||
class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
{
|
||||
// Empty Robots
|
||||
public function test_Construct_1()
|
||||
public function testConstruct1()
|
||||
{
|
||||
$robotstxt = new Robotstxt("", "domsearch");
|
||||
$res = $robotstxt->allow();
|
||||
$this->assertSame($res, ["/"]);
|
||||
}
|
||||
public function test_Construct_2()
|
||||
public function testConstruct2()
|
||||
{
|
||||
$robotstxt = new Robotstxt("", "domsearch");
|
||||
$res = $robotstxt->disallow();
|
||||
$this->assertSame($res, array ());
|
||||
}
|
||||
public function test_Construct_3()
|
||||
public function testConstruct3()
|
||||
{
|
||||
$robotstxt = new Robotstxt("", "domsearch");
|
||||
$res = $robotstxt->sitemaps();
|
||||
$this->assertSame($res, array ());
|
||||
}
|
||||
public function test_Construct_4()
|
||||
public function testConstruct4()
|
||||
{
|
||||
$robotstxt = new Robotstxt("", "domsearch");
|
||||
$res = $robotstxt->crawldelay();
|
||||
@@ -41,7 +41,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
// Allow
|
||||
public function test_allow_1()
|
||||
public function testAllow1()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow:\n",
|
||||
@@ -50,7 +50,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->allow();
|
||||
$this->assertSame($res, ["/"]);
|
||||
}
|
||||
public function test_allow_2()
|
||||
public function testAllow2()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n",
|
||||
@@ -59,7 +59,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->allow();
|
||||
$this->assertSame($res, ["/"]);
|
||||
}
|
||||
public function test_allow_3()
|
||||
public function testAllow3()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n",
|
||||
@@ -68,7 +68,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->allow();
|
||||
$this->assertSame($res, ["/"]);
|
||||
}
|
||||
public function test_allow_4()
|
||||
public function testAllow4()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: DomSearch\n" .
|
||||
@@ -84,7 +84,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
// Disallow
|
||||
public function test_disallow_1()
|
||||
public function testDisallow1()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\n",
|
||||
@@ -93,7 +93,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->disallow();
|
||||
$this->assertSame($res, ["/"]);
|
||||
}
|
||||
public function test_disallow_2()
|
||||
public function testDisallow2()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n",
|
||||
@@ -102,7 +102,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->disallow();
|
||||
$this->assertSame($res, ["/"]);
|
||||
}
|
||||
public function test_disallow_3()
|
||||
public function testDisallow3()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
|
||||
@@ -113,7 +113,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
// Sitemaps
|
||||
public function test_sitemaps_1()
|
||||
public function testSitemaps1()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
|
||||
@@ -122,7 +122,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->sitemaps();
|
||||
$this->assertSame($res, []);
|
||||
}
|
||||
public function test_sitemaps_2()
|
||||
public function testSitemaps2()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml",
|
||||
@@ -131,7 +131,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->sitemaps();
|
||||
$this->assertSame($res, ["http://example.com/sitemap.xml"]);
|
||||
}
|
||||
public function test_sitemaps_3()
|
||||
public function testSitemaps3()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\n" .
|
||||
@@ -146,7 +146,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
);
|
||||
}
|
||||
|
||||
public function test_sitemaps_error_1()
|
||||
public function testSitemapsError1()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\nSitemap: URL",
|
||||
@@ -157,7 +157,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
// Host
|
||||
public function test_host_1()
|
||||
public function testHost1()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\n",
|
||||
@@ -166,7 +166,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->host();
|
||||
$this->assertSame($res, null);
|
||||
}
|
||||
public function test_host_2()
|
||||
public function testHost2()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\n\nHost: localhost",
|
||||
@@ -175,7 +175,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->host();
|
||||
$this->assertSame($res, "localhost");
|
||||
}
|
||||
public function test_host_error_1()
|
||||
public function testHostError1()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
|
||||
@@ -184,7 +184,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->host();
|
||||
$this->assertSame($res, "localhost");
|
||||
}
|
||||
public function test_host_error_2()
|
||||
public function testHostError2()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
|
||||
@@ -195,13 +195,13 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
// URLAllow
|
||||
public function test_urlallow_1()
|
||||
public function testUrlallow1()
|
||||
{
|
||||
$robotstxt = new Robotstxt("", "domsearch");
|
||||
$res = $robotstxt->URLAllow("/");
|
||||
$this->assertSame($res, true);
|
||||
}
|
||||
public function test_urlallow_2()
|
||||
public function testUrlallow2()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /",
|
||||
@@ -210,7 +210,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->URLAllow("/");
|
||||
$this->assertSame($res, false);
|
||||
}
|
||||
public function test_urlallow_3()
|
||||
public function testUrlallow3()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\nAllow: /allow/",
|
||||
@@ -219,7 +219,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->URLAllow("/");
|
||||
$this->assertSame($res, false);
|
||||
}
|
||||
public function test_urlallow_4()
|
||||
public function testUrlallow4()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\nAllow: /allow/",
|
||||
@@ -228,7 +228,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->URLAllow("/allow/file");
|
||||
$this->assertSame($res, true);
|
||||
}
|
||||
public function test_urlallow_5()
|
||||
public function testUrlallow5()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
|
||||
@@ -237,7 +237,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->URLAllow("/allow/file.gif");
|
||||
$this->assertSame($res, true);
|
||||
}
|
||||
public function test_urlallow_6()
|
||||
public function testUrlallow6()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
|
||||
@@ -246,7 +246,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
$res = $robotstxt->URLAllow("/allow/.gif");
|
||||
$this->assertSame($res, false);
|
||||
}
|
||||
public function test_urlallow_7()
|
||||
public function testUrlallow7()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$",
|
||||
@@ -257,7 +257,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
}
|
||||
|
||||
// Tests like http://www.robotstxt.org/norobots-rfc.txt
|
||||
public function test_rfc_unhipbot_1()
|
||||
public function testRfcUnhipbot1()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -285,7 +285,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_2()
|
||||
public function testRfcUnhipbot2()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -313,7 +313,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_3()
|
||||
public function testRfcUnhipbot3()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -341,7 +341,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_4()
|
||||
public function testRfcUnhipbot4()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -369,7 +369,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_5()
|
||||
public function testRfcUnhipbot5()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -397,7 +397,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_6()
|
||||
public function testRfcUnhipbot6()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -425,7 +425,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_7()
|
||||
public function testRfcUnhipbot7()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -453,7 +453,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_8()
|
||||
public function testRfcUnhipbot8()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -481,7 +481,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_9()
|
||||
public function testRfcUnhipbot9()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -509,7 +509,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_10()
|
||||
public function testRfcUnhipbot10()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -537,7 +537,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_unhipbot_11()
|
||||
public function testRfcUnhipbot11()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -565,7 +565,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_1()
|
||||
public function testRfcWebcrawler1()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -593,7 +593,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_2()
|
||||
public function testRfcWebcrawler2()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -621,7 +621,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_3()
|
||||
public function testRfcWebcrawler3()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -649,7 +649,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_4()
|
||||
public function testRfcWebcrawler4()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -677,7 +677,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_5()
|
||||
public function testRfcWebcrawler5()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -705,7 +705,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_6()
|
||||
public function testRfcWebcrawler6()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -733,7 +733,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_7()
|
||||
public function testRfcWebcrawler7()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -761,7 +761,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_8()
|
||||
public function testRfcWebcrawler8()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -789,7 +789,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_9()
|
||||
public function testRfcWebcrawler9()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -817,7 +817,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_10()
|
||||
public function testRfcWebcrawler10()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -845,7 +845,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_webcrawler_11()
|
||||
public function testRfcWebcrawler11()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -873,7 +873,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_1()
|
||||
public function testRfcExcite1()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -901,7 +901,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_2()
|
||||
public function testRfcExcite2()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -929,7 +929,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_3()
|
||||
public function testRfcExcite3()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -957,7 +957,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_4()
|
||||
public function testRfcExcite4()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -985,7 +985,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_5()
|
||||
public function testRfcExcite5()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1013,7 +1013,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_6()
|
||||
public function testRfcExcite6()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1041,7 +1041,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_7()
|
||||
public function testRfcExcite7()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1069,7 +1069,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_8()
|
||||
public function testRfcExcite8()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1097,7 +1097,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_9()
|
||||
public function testRfcExcite9()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1125,7 +1125,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_10()
|
||||
public function testRfcExcite10()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1153,7 +1153,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_excite_11()
|
||||
public function testRfcExcite11()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1181,7 +1181,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_1()
|
||||
public function testRfcOther1()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1206,7 +1206,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
// }}}
|
||||
$this->assertSame($robotstxt->URLAllow("http://www.fict.org/"), false);
|
||||
}
|
||||
public function test_rfc_other_2()
|
||||
public function testRfcOther2()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1234,7 +1234,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_3()
|
||||
public function testRfcOther3()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1262,7 +1262,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_4()
|
||||
public function testRfcOther4()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1290,7 +1290,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_5()
|
||||
public function testRfcOther5()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1318,7 +1318,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_6()
|
||||
public function testRfcOther6()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1346,7 +1346,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_7()
|
||||
public function testRfcOther7()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1374,7 +1374,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_8()
|
||||
public function testRfcOther8()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1402,7 +1402,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_9()
|
||||
public function testRfcOther9()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1430,7 +1430,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_10()
|
||||
public function testRfcOther10()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1458,7 +1458,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
false
|
||||
);
|
||||
}
|
||||
public function test_rfc_other_11()
|
||||
public function testRfcOther11()
|
||||
{
|
||||
// {{{
|
||||
$robotstxt = new Robotstxt(
|
||||
@@ -1486,4 +1486,49 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
// Allow/Disallow must start by slash or be empty
|
||||
public function testAllowDisallowSlash1()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\n" .
|
||||
"Disallow: INVALID\n\n",
|
||||
"domsearch"
|
||||
);
|
||||
$res = $robotstxt->errors();
|
||||
$this->assertSame($res, [1 => "Disallow : Line must start by slash"]);
|
||||
}
|
||||
|
||||
public function testAllowDisallowSlash2()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\n" .
|
||||
"Disallow: \n\n",
|
||||
"domsearch"
|
||||
);
|
||||
$res = $robotstxt->errors();
|
||||
$this->assertSame($res, []);
|
||||
}
|
||||
|
||||
public function testAllowDisallowSlash3()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\n" .
|
||||
"Allow: INVALID\n\n",
|
||||
"domsearch"
|
||||
);
|
||||
$res = $robotstxt->errors();
|
||||
$this->assertSame($res, [1 => "Allow : Line must start by slash"]);
|
||||
}
|
||||
|
||||
public function testAllowDisallowSlash4()
|
||||
{
|
||||
$robotstxt = new Robotstxt(
|
||||
"User-Agent: *\n" .
|
||||
"Allow: \n\n",
|
||||
"domsearch"
|
||||
);
|
||||
$res = $robotstxt->errors();
|
||||
$this->assertSame($res, []);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,7 @@ namespace Domframework;
|
||||
* It allow to examine an URL against the robots.txt file and return if the URL
|
||||
* is allowed to be used or not
|
||||
* The definition of the format of robots.txt file is available here :
|
||||
* https://www.rfc-editor.org/rfc/rfc9309.txt
|
||||
* http://www.robotstxt.org/norobots-rfc.txt
|
||||
* https://en.wikipedia.org/wiki/Robots_exclusion_standard
|
||||
*/
|
||||
@@ -94,15 +95,15 @@ class Robotstxt
|
||||
// Get the Allow and Disallow lines. The stop will arrive on first
|
||||
// User-Agent line arriving after a Allow/Disallow.
|
||||
// Comments and empty lines are removed
|
||||
for ($i = key($keys); $i < count($content); $i++) {
|
||||
$line = trim($content[$i]);
|
||||
for ($nb = key($keys); $nb < count($content); $nb++) {
|
||||
$line = trim($content[$nb]);
|
||||
if (stripos($line, "Sitemap:") === 0) {
|
||||
// Already managed in the general parser. Not needed in the specific
|
||||
// user-agent parser. Must at least be catched to not generate an
|
||||
// error
|
||||
} elseif (stripos($line, "Host:") === 0) {
|
||||
if ($this->host !== null) {
|
||||
$this->errors[$i] = dgettext(
|
||||
$this->errors[$nb] = dgettext(
|
||||
"domframework",
|
||||
"Multiple Hosts set"
|
||||
);
|
||||
@@ -112,15 +113,31 @@ class Robotstxt
|
||||
} elseif ($line === "" || $line[0] === "#") {
|
||||
// Comment, empty line : SKIP
|
||||
} elseif (stripos($line, "allow:") === 0) {
|
||||
$this->allow[] = $this->getValueFromLine($line);
|
||||
$allow = $this->getValueFromLine($line);
|
||||
if (trim($allow) === "" || $allow[0] === "/") {
|
||||
$this->allow[] = $allow;
|
||||
} else {
|
||||
$this->errors[$nb] = dgettext(
|
||||
"domframework",
|
||||
"Allow : Line must start by slash"
|
||||
);
|
||||
}
|
||||
} elseif (stripos($line, "disallow:") === 0) {
|
||||
$this->disallow[] = $this->getValueFromLine($line);
|
||||
$disallow = $this->getValueFromLine($line);
|
||||
if (trim($disallow) === "" || $disallow[0] === "/") {
|
||||
$this->disallow[] = $disallow;
|
||||
} else {
|
||||
$this->errors[$nb] = dgettext(
|
||||
"domframework",
|
||||
"Disallow : Line must start by slash"
|
||||
);
|
||||
}
|
||||
} elseif (stripos($line, "crawl-delay:") === 0) {
|
||||
$val = $this->getValueFromLine($line);
|
||||
if ($val > 1 && $val < 60 && $this->crawldelay === null) {
|
||||
$this->crawldelay = intval($val);
|
||||
} else {
|
||||
$this->errors[$i] = dgettext(
|
||||
$this->errors[$nb] = dgettext(
|
||||
"domframework",
|
||||
"Crawldelay : value out of range (1-60)"
|
||||
);
|
||||
@@ -134,7 +151,7 @@ class Robotstxt
|
||||
}
|
||||
} else {
|
||||
// Not managed line : error
|
||||
$this->errors[$i] = sprintf(dgettext(
|
||||
$this->errors[$nb] = sprintf(dgettext(
|
||||
"domframework",
|
||||
"Invalid line : unknown command : '%s'"
|
||||
), $line);
|
||||
|
||||
Reference in New Issue
Block a user