Robots : values for Allow and Disallow must be empty or start by slash

This commit is contained in:
2023-01-03 13:20:21 +01:00
parent a470048b4f
commit 535525db1d
2 changed files with 139 additions and 77 deletions

View File

@@ -15,25 +15,25 @@ use Domframework\Robotstxt;
class RobotstxtTest extends \PHPUnit_Framework_TestCase
{
// Empty Robots
public function test_Construct_1()
public function testConstruct1()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_Construct_2()
public function testConstruct2()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->disallow();
$this->assertSame($res, array ());
}
public function test_Construct_3()
public function testConstruct3()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->sitemaps();
$this->assertSame($res, array ());
}
public function test_Construct_4()
public function testConstruct4()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->crawldelay();
@@ -41,7 +41,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
}
// Allow
public function test_allow_1()
public function testAllow1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow:\n",
@@ -50,7 +50,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_allow_2()
public function testAllow2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n",
@@ -59,7 +59,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_allow_3()
public function testAllow3()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n",
@@ -68,7 +68,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_allow_4()
public function testAllow4()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\n" .
@@ -84,7 +84,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
}
// Disallow
public function test_disallow_1()
public function testDisallow1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n",
@@ -93,7 +93,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->disallow();
$this->assertSame($res, ["/"]);
}
public function test_disallow_2()
public function testDisallow2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n",
@@ -102,7 +102,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->disallow();
$this->assertSame($res, ["/"]);
}
public function test_disallow_3()
public function testDisallow3()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
@@ -113,7 +113,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
}
// Sitemaps
public function test_sitemaps_1()
public function testSitemaps1()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
@@ -122,7 +122,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->sitemaps();
$this->assertSame($res, []);
}
public function test_sitemaps_2()
public function testSitemaps2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml",
@@ -131,7 +131,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->sitemaps();
$this->assertSame($res, ["http://example.com/sitemap.xml"]);
}
public function test_sitemaps_3()
public function testSitemaps3()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n" .
@@ -146,7 +146,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
);
}
public function test_sitemaps_error_1()
public function testSitemapsError1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nSitemap: URL",
@@ -157,7 +157,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
}
// Host
public function test_host_1()
public function testHost1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n",
@@ -166,7 +166,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->host();
$this->assertSame($res, null);
}
public function test_host_2()
public function testHost2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost",
@@ -175,7 +175,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->host();
$this->assertSame($res, "localhost");
}
public function test_host_error_1()
public function testHostError1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
@@ -184,7 +184,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->host();
$this->assertSame($res, "localhost");
}
public function test_host_error_2()
public function testHostError2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
@@ -195,13 +195,13 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
}
// URLAllow
public function test_urlallow_1()
public function testUrlallow1()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->URLAllow("/");
$this->assertSame($res, true);
}
public function test_urlallow_2()
public function testUrlallow2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /",
@@ -210,7 +210,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/");
$this->assertSame($res, false);
}
public function test_urlallow_3()
public function testUrlallow3()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/",
@@ -219,7 +219,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/");
$this->assertSame($res, false);
}
public function test_urlallow_4()
public function testUrlallow4()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/",
@@ -228,7 +228,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/allow/file");
$this->assertSame($res, true);
}
public function test_urlallow_5()
public function testUrlallow5()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
@@ -237,7 +237,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/allow/file.gif");
$this->assertSame($res, true);
}
public function test_urlallow_6()
public function testUrlallow6()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
@@ -246,7 +246,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/allow/.gif");
$this->assertSame($res, false);
}
public function test_urlallow_7()
public function testUrlallow7()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$",
@@ -257,7 +257,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
}
// Tests like http://www.robotstxt.org/norobots-rfc.txt
public function test_rfc_unhipbot_1()
public function testRfcUnhipbot1()
{
// {{{
$robotstxt = new Robotstxt(
@@ -285,7 +285,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_2()
public function testRfcUnhipbot2()
{
// {{{
$robotstxt = new Robotstxt(
@@ -313,7 +313,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_3()
public function testRfcUnhipbot3()
{
// {{{
$robotstxt = new Robotstxt(
@@ -341,7 +341,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_unhipbot_4()
public function testRfcUnhipbot4()
{
// {{{
$robotstxt = new Robotstxt(
@@ -369,7 +369,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_5()
public function testRfcUnhipbot5()
{
// {{{
$robotstxt = new Robotstxt(
@@ -397,7 +397,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_6()
public function testRfcUnhipbot6()
{
// {{{
$robotstxt = new Robotstxt(
@@ -425,7 +425,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_7()
public function testRfcUnhipbot7()
{
// {{{
$robotstxt = new Robotstxt(
@@ -453,7 +453,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_8()
public function testRfcUnhipbot8()
{
// {{{
$robotstxt = new Robotstxt(
@@ -481,7 +481,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_9()
public function testRfcUnhipbot9()
{
// {{{
$robotstxt = new Robotstxt(
@@ -509,7 +509,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_10()
public function testRfcUnhipbot10()
{
// {{{
$robotstxt = new Robotstxt(
@@ -537,7 +537,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_unhipbot_11()
public function testRfcUnhipbot11()
{
// {{{
$robotstxt = new Robotstxt(
@@ -565,7 +565,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_webcrawler_1()
public function testRfcWebcrawler1()
{
// {{{
$robotstxt = new Robotstxt(
@@ -593,7 +593,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_2()
public function testRfcWebcrawler2()
{
// {{{
$robotstxt = new Robotstxt(
@@ -621,7 +621,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_3()
public function testRfcWebcrawler3()
{
// {{{
$robotstxt = new Robotstxt(
@@ -649,7 +649,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_4()
public function testRfcWebcrawler4()
{
// {{{
$robotstxt = new Robotstxt(
@@ -677,7 +677,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_5()
public function testRfcWebcrawler5()
{
// {{{
$robotstxt = new Robotstxt(
@@ -705,7 +705,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_6()
public function testRfcWebcrawler6()
{
// {{{
$robotstxt = new Robotstxt(
@@ -733,7 +733,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_7()
public function testRfcWebcrawler7()
{
// {{{
$robotstxt = new Robotstxt(
@@ -761,7 +761,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_8()
public function testRfcWebcrawler8()
{
// {{{
$robotstxt = new Robotstxt(
@@ -789,7 +789,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_9()
public function testRfcWebcrawler9()
{
// {{{
$robotstxt = new Robotstxt(
@@ -817,7 +817,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_10()
public function testRfcWebcrawler10()
{
// {{{
$robotstxt = new Robotstxt(
@@ -845,7 +845,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_webcrawler_11()
public function testRfcWebcrawler11()
{
// {{{
$robotstxt = new Robotstxt(
@@ -873,7 +873,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_1()
public function testRfcExcite1()
{
// {{{
$robotstxt = new Robotstxt(
@@ -901,7 +901,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_2()
public function testRfcExcite2()
{
// {{{
$robotstxt = new Robotstxt(
@@ -929,7 +929,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_3()
public function testRfcExcite3()
{
// {{{
$robotstxt = new Robotstxt(
@@ -957,7 +957,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_4()
public function testRfcExcite4()
{
// {{{
$robotstxt = new Robotstxt(
@@ -985,7 +985,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_5()
public function testRfcExcite5()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1013,7 +1013,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_6()
public function testRfcExcite6()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1041,7 +1041,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_7()
public function testRfcExcite7()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1069,7 +1069,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_8()
public function testRfcExcite8()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1097,7 +1097,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_9()
public function testRfcExcite9()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1125,7 +1125,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_10()
public function testRfcExcite10()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1153,7 +1153,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_excite_11()
public function testRfcExcite11()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1181,7 +1181,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_other_1()
public function testRfcOther1()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1206,7 +1206,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
// }}}
$this->assertSame($robotstxt->URLAllow("http://www.fict.org/"), false);
}
public function test_rfc_other_2()
public function testRfcOther2()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1234,7 +1234,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_other_3()
public function testRfcOther3()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1262,7 +1262,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_other_4()
public function testRfcOther4()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1290,7 +1290,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_other_5()
public function testRfcOther5()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1318,7 +1318,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_other_6()
public function testRfcOther6()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1346,7 +1346,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_other_7()
public function testRfcOther7()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1374,7 +1374,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_other_8()
public function testRfcOther8()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1402,7 +1402,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
public function test_rfc_other_9()
public function testRfcOther9()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1430,7 +1430,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_other_10()
public function testRfcOther10()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1458,7 +1458,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false
);
}
public function test_rfc_other_11()
public function testRfcOther11()
{
// {{{
$robotstxt = new Robotstxt(
@@ -1486,4 +1486,49 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true
);
}
// Allow/Disallow must start by slash or be empty
public function testAllowDisallowSlash1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Disallow: INVALID\n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, [1 => "Disallow : Line must start by slash"]);
}
public function testAllowDisallowSlash2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Disallow: \n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, []);
}
public function testAllowDisallowSlash3()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Allow: INVALID\n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, [1 => "Allow : Line must start by slash"]);
}
public function testAllowDisallowSlash4()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Allow: \n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, []);
}
}