Robots : values for Allow and Disallow must be empty or start by slash

This commit is contained in:
2023-01-03 13:20:21 +01:00
parent a470048b4f
commit 535525db1d
2 changed files with 139 additions and 77 deletions

View File

@@ -15,25 +15,25 @@ use Domframework\Robotstxt;
class RobotstxtTest extends \PHPUnit_Framework_TestCase class RobotstxtTest extends \PHPUnit_Framework_TestCase
{ {
// Empty Robots // Empty Robots
public function test_Construct_1() public function testConstruct1()
{ {
$robotstxt = new Robotstxt("", "domsearch"); $robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->allow(); $res = $robotstxt->allow();
$this->assertSame($res, ["/"]); $this->assertSame($res, ["/"]);
} }
public function test_Construct_2() public function testConstruct2()
{ {
$robotstxt = new Robotstxt("", "domsearch"); $robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->disallow(); $res = $robotstxt->disallow();
$this->assertSame($res, array ()); $this->assertSame($res, array ());
} }
public function test_Construct_3() public function testConstruct3()
{ {
$robotstxt = new Robotstxt("", "domsearch"); $robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->sitemaps(); $res = $robotstxt->sitemaps();
$this->assertSame($res, array ()); $this->assertSame($res, array ());
} }
public function test_Construct_4() public function testConstruct4()
{ {
$robotstxt = new Robotstxt("", "domsearch"); $robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->crawldelay(); $res = $robotstxt->crawldelay();
@@ -41,7 +41,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
} }
// Allow // Allow
public function test_allow_1() public function testAllow1()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow:\n", "User-Agent: *\nDisallow:\n",
@@ -50,7 +50,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->allow(); $res = $robotstxt->allow();
$this->assertSame($res, ["/"]); $this->assertSame($res, ["/"]);
} }
public function test_allow_2() public function testAllow2()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n", "User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n",
@@ -59,7 +59,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->allow(); $res = $robotstxt->allow();
$this->assertSame($res, ["/"]); $this->assertSame($res, ["/"]);
} }
public function test_allow_3() public function testAllow3()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n", "User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n",
@@ -68,7 +68,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->allow(); $res = $robotstxt->allow();
$this->assertSame($res, ["/"]); $this->assertSame($res, ["/"]);
} }
public function test_allow_4() public function testAllow4()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: DomSearch\n" . "User-Agent: DomSearch\n" .
@@ -84,7 +84,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
} }
// Disallow // Disallow
public function test_disallow_1() public function testDisallow1()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n", "User-Agent: *\nDisallow: /\n",
@@ -93,7 +93,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->disallow(); $res = $robotstxt->disallow();
$this->assertSame($res, ["/"]); $this->assertSame($res, ["/"]);
} }
public function test_disallow_2() public function testDisallow2()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n", "User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n",
@@ -102,7 +102,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->disallow(); $res = $robotstxt->disallow();
$this->assertSame($res, ["/"]); $this->assertSame($res, ["/"]);
} }
public function test_disallow_3() public function testDisallow3()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n", "User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
@@ -113,7 +113,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
} }
// Sitemaps // Sitemaps
public function test_sitemaps_1() public function testSitemaps1()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n", "User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
@@ -122,7 +122,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->sitemaps(); $res = $robotstxt->sitemaps();
$this->assertSame($res, []); $this->assertSame($res, []);
} }
public function test_sitemaps_2() public function testSitemaps2()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml", "User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml",
@@ -131,7 +131,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->sitemaps(); $res = $robotstxt->sitemaps();
$this->assertSame($res, ["http://example.com/sitemap.xml"]); $this->assertSame($res, ["http://example.com/sitemap.xml"]);
} }
public function test_sitemaps_3() public function testSitemaps3()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n" . "User-Agent: *\nDisallow: /\n" .
@@ -146,7 +146,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
); );
} }
public function test_sitemaps_error_1() public function testSitemapsError1()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nSitemap: URL", "User-Agent: *\nDisallow: /\nSitemap: URL",
@@ -157,7 +157,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
} }
// Host // Host
public function test_host_1() public function testHost1()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n", "User-Agent: *\nDisallow: /\n",
@@ -166,7 +166,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->host(); $res = $robotstxt->host();
$this->assertSame($res, null); $this->assertSame($res, null);
} }
public function test_host_2() public function testHost2()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost", "User-Agent: *\nDisallow: /\n\nHost: localhost",
@@ -175,7 +175,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->host(); $res = $robotstxt->host();
$this->assertSame($res, "localhost"); $this->assertSame($res, "localhost");
} }
public function test_host_error_1() public function testHostError1()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
@@ -184,7 +184,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->host(); $res = $robotstxt->host();
$this->assertSame($res, "localhost"); $this->assertSame($res, "localhost");
} }
public function test_host_error_2() public function testHostError2()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
@@ -195,13 +195,13 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
} }
// URLAllow // URLAllow
public function test_urlallow_1() public function testUrlallow1()
{ {
$robotstxt = new Robotstxt("", "domsearch"); $robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->URLAllow("/"); $res = $robotstxt->URLAllow("/");
$this->assertSame($res, true); $this->assertSame($res, true);
} }
public function test_urlallow_2() public function testUrlallow2()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /", "User-Agent: *\nDisallow: /",
@@ -210,7 +210,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/"); $res = $robotstxt->URLAllow("/");
$this->assertSame($res, false); $this->assertSame($res, false);
} }
public function test_urlallow_3() public function testUrlallow3()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/", "User-Agent: *\nDisallow: /\nAllow: /allow/",
@@ -219,7 +219,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/"); $res = $robotstxt->URLAllow("/");
$this->assertSame($res, false); $this->assertSame($res, false);
} }
public function test_urlallow_4() public function testUrlallow4()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/", "User-Agent: *\nDisallow: /\nAllow: /allow/",
@@ -228,7 +228,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/allow/file"); $res = $robotstxt->URLAllow("/allow/file");
$this->assertSame($res, true); $this->assertSame($res, true);
} }
public function test_urlallow_5() public function testUrlallow5()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
@@ -237,7 +237,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/allow/file.gif"); $res = $robotstxt->URLAllow("/allow/file.gif");
$this->assertSame($res, true); $this->assertSame($res, true);
} }
public function test_urlallow_6() public function testUrlallow6()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
@@ -246,7 +246,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
$res = $robotstxt->URLAllow("/allow/.gif"); $res = $robotstxt->URLAllow("/allow/.gif");
$this->assertSame($res, false); $this->assertSame($res, false);
} }
public function test_urlallow_7() public function testUrlallow7()
{ {
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$", "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$",
@@ -257,7 +257,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
} }
// Tests like http://www.robotstxt.org/norobots-rfc.txt // Tests like http://www.robotstxt.org/norobots-rfc.txt
public function test_rfc_unhipbot_1() public function testRfcUnhipbot1()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -285,7 +285,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_2() public function testRfcUnhipbot2()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -313,7 +313,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_3() public function testRfcUnhipbot3()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -341,7 +341,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_unhipbot_4() public function testRfcUnhipbot4()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -369,7 +369,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_5() public function testRfcUnhipbot5()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -397,7 +397,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_6() public function testRfcUnhipbot6()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -425,7 +425,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_7() public function testRfcUnhipbot7()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -453,7 +453,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_8() public function testRfcUnhipbot8()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -481,7 +481,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_9() public function testRfcUnhipbot9()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -509,7 +509,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_10() public function testRfcUnhipbot10()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -537,7 +537,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_unhipbot_11() public function testRfcUnhipbot11()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -565,7 +565,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_webcrawler_1() public function testRfcWebcrawler1()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -593,7 +593,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_2() public function testRfcWebcrawler2()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -621,7 +621,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_3() public function testRfcWebcrawler3()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -649,7 +649,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_4() public function testRfcWebcrawler4()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -677,7 +677,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_5() public function testRfcWebcrawler5()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -705,7 +705,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_6() public function testRfcWebcrawler6()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -733,7 +733,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_7() public function testRfcWebcrawler7()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -761,7 +761,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_8() public function testRfcWebcrawler8()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -789,7 +789,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_9() public function testRfcWebcrawler9()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -817,7 +817,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_10() public function testRfcWebcrawler10()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -845,7 +845,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_webcrawler_11() public function testRfcWebcrawler11()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -873,7 +873,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_1() public function testRfcExcite1()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -901,7 +901,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_2() public function testRfcExcite2()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -929,7 +929,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_3() public function testRfcExcite3()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -957,7 +957,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_4() public function testRfcExcite4()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -985,7 +985,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_5() public function testRfcExcite5()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1013,7 +1013,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_6() public function testRfcExcite6()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1041,7 +1041,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_7() public function testRfcExcite7()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1069,7 +1069,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_8() public function testRfcExcite8()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1097,7 +1097,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_9() public function testRfcExcite9()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1125,7 +1125,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_10() public function testRfcExcite10()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1153,7 +1153,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_excite_11() public function testRfcExcite11()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1181,7 +1181,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_other_1() public function testRfcOther1()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1206,7 +1206,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
// }}} // }}}
$this->assertSame($robotstxt->URLAllow("http://www.fict.org/"), false); $this->assertSame($robotstxt->URLAllow("http://www.fict.org/"), false);
} }
public function test_rfc_other_2() public function testRfcOther2()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1234,7 +1234,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_other_3() public function testRfcOther3()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1262,7 +1262,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_other_4() public function testRfcOther4()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1290,7 +1290,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_other_5() public function testRfcOther5()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1318,7 +1318,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_other_6() public function testRfcOther6()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1346,7 +1346,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_other_7() public function testRfcOther7()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1374,7 +1374,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_other_8() public function testRfcOther8()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1402,7 +1402,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
public function test_rfc_other_9() public function testRfcOther9()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1430,7 +1430,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_other_10() public function testRfcOther10()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1458,7 +1458,7 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
false false
); );
} }
public function test_rfc_other_11() public function testRfcOther11()
{ {
// {{{ // {{{
$robotstxt = new Robotstxt( $robotstxt = new Robotstxt(
@@ -1486,4 +1486,49 @@ class RobotstxtTest extends \PHPUnit_Framework_TestCase
true true
); );
} }
// Allow/Disallow must start by slash or be empty
public function testAllowDisallowSlash1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Disallow: INVALID\n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, [1 => "Disallow : Line must start by slash"]);
}
public function testAllowDisallowSlash2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Disallow: \n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, []);
}
public function testAllowDisallowSlash3()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Allow: INVALID\n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, [1 => "Allow : Line must start by slash"]);
}
public function testAllowDisallowSlash4()
{
$robotstxt = new Robotstxt(
"User-Agent: *\n" .
"Allow: \n\n",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, []);
}
} }

View File

@@ -13,6 +13,7 @@ namespace Domframework;
* It allow to examine an URL against the robots.txt file and return if the URL * It allow to examine an URL against the robots.txt file and return if the URL
* is allowed to be used or not * is allowed to be used or not
* The definition of the format of robots.txt file is available here : * The definition of the format of robots.txt file is available here :
* https://www.rfc-editor.org/rfc/rfc9309.txt
* http://www.robotstxt.org/norobots-rfc.txt * http://www.robotstxt.org/norobots-rfc.txt
* https://en.wikipedia.org/wiki/Robots_exclusion_standard * https://en.wikipedia.org/wiki/Robots_exclusion_standard
*/ */
@@ -94,15 +95,15 @@ class Robotstxt
// Get the Allow and Disallow lines. The stop will arrive on first // Get the Allow and Disallow lines. The stop will arrive on first
// User-Agent line arriving after a Allow/Disallow. // User-Agent line arriving after a Allow/Disallow.
// Comments and empty lines are removed // Comments and empty lines are removed
for ($i = key($keys); $i < count($content); $i++) { for ($nb = key($keys); $nb < count($content); $nb++) {
$line = trim($content[$i]); $line = trim($content[$nb]);
if (stripos($line, "Sitemap:") === 0) { if (stripos($line, "Sitemap:") === 0) {
// Already managed in the general parser. Not needed in the specific // Already managed in the general parser. Not needed in the specific
// user-agent parser. Must at least be catched to not generate an // user-agent parser. Must at least be catched to not generate an
// error // error
} elseif (stripos($line, "Host:") === 0) { } elseif (stripos($line, "Host:") === 0) {
if ($this->host !== null) { if ($this->host !== null) {
$this->errors[$i] = dgettext( $this->errors[$nb] = dgettext(
"domframework", "domframework",
"Multiple Hosts set" "Multiple Hosts set"
); );
@@ -112,15 +113,31 @@ class Robotstxt
} elseif ($line === "" || $line[0] === "#") { } elseif ($line === "" || $line[0] === "#") {
// Comment, empty line : SKIP // Comment, empty line : SKIP
} elseif (stripos($line, "allow:") === 0) { } elseif (stripos($line, "allow:") === 0) {
$this->allow[] = $this->getValueFromLine($line); $allow = $this->getValueFromLine($line);
if (trim($allow) === "" || $allow[0] === "/") {
$this->allow[] = $allow;
} else {
$this->errors[$nb] = dgettext(
"domframework",
"Allow : Line must start by slash"
);
}
} elseif (stripos($line, "disallow:") === 0) { } elseif (stripos($line, "disallow:") === 0) {
$this->disallow[] = $this->getValueFromLine($line); $disallow = $this->getValueFromLine($line);
if (trim($disallow) === "" || $disallow[0] === "/") {
$this->disallow[] = $disallow;
} else {
$this->errors[$nb] = dgettext(
"domframework",
"Disallow : Line must start by slash"
);
}
} elseif (stripos($line, "crawl-delay:") === 0) { } elseif (stripos($line, "crawl-delay:") === 0) {
$val = $this->getValueFromLine($line); $val = $this->getValueFromLine($line);
if ($val > 1 && $val < 60 && $this->crawldelay === null) { if ($val > 1 && $val < 60 && $this->crawldelay === null) {
$this->crawldelay = intval($val); $this->crawldelay = intval($val);
} else { } else {
$this->errors[$i] = dgettext( $this->errors[$nb] = dgettext(
"domframework", "domframework",
"Crawldelay : value out of range (1-60)" "Crawldelay : value out of range (1-60)"
); );
@@ -134,7 +151,7 @@ class Robotstxt
} }
} else { } else {
// Not managed line : error // Not managed line : error
$this->errors[$i] = sprintf(dgettext( $this->errors[$nb] = sprintf(dgettext(
"domframework", "domframework",
"Invalid line : unknown command : '%s'" "Invalid line : unknown command : '%s'"
), $line); ), $line);