* @license BSD */ namespace Domframework\Tests; use Domframework\Robotstxt; /** * Test the Robotstxt file */ class RobotstxtTest extends \PHPUnit_Framework_TestCase { // Empty Robots public function testConstruct1() { $robotstxt = new Robotstxt("", "domsearch"); $res = $robotstxt->allow(); $this->assertSame($res, ["/"]); } public function testConstruct2() { $robotstxt = new Robotstxt("", "domsearch"); $res = $robotstxt->disallow(); $this->assertSame($res, []); } public function testConstruct3() { $robotstxt = new Robotstxt("", "domsearch"); $res = $robotstxt->sitemaps(); $this->assertSame($res, []); } public function testConstruct4() { $robotstxt = new Robotstxt("", "domsearch"); $res = $robotstxt->crawldelay(); $this->assertSame($res, null); } // Allow public function testAllow1() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow:\n", "domsearch" ); $res = $robotstxt->allow(); $this->assertSame($res, ["/"]); } public function testAllow2() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n", "domsearch" ); $res = $robotstxt->allow(); $this->assertSame($res, ["/"]); } public function testAllow3() { $robotstxt = new Robotstxt( "User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n", "domsearch" ); $res = $robotstxt->allow(); $this->assertSame($res, ["/"]); } public function testAllow4() { $robotstxt = new Robotstxt( "User-Agent: DomSearch\n" . "User-Agent: User1\n" . "User-Agent: User2\n" . "Disallow:\n\n" . "User-Agent: *\n" . "Disallow: /\n", "domsearch" ); $res = $robotstxt->allow(); $this->assertSame($res, ["/"]); } // Disallow public function testDisallow1() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\n", "domsearch" ); $res = $robotstxt->disallow(); $this->assertSame($res, ["/"]); } public function testDisallow2() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n", "domsearch" ); $res = $robotstxt->disallow(); $this->assertSame($res, ["/"]); } public function testDisallow3() { $robotstxt = new Robotstxt( "User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n", "domsearch" ); $res = $robotstxt->disallow(); $this->assertSame($res, ["/"]); } // Sitemaps public function testSitemaps1() { $robotstxt = new Robotstxt( "User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n", "domsearch" ); $res = $robotstxt->sitemaps(); $this->assertSame($res, []); } public function testSitemaps2() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml", "domsearch" ); $res = $robotstxt->sitemaps(); $this->assertSame($res, ["http://example.com/sitemap.xml"]); } public function testSitemaps3() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\n" . "Sitemap: http://example.com/sitemap.xml\n" . "Sitemap: http://example.com/SITEMAP.XML", "domsearch" ); $res = $robotstxt->sitemaps(); $this->assertSame( $res, ["http://example.com/sitemap.xml", "http://example.com/SITEMAP.XML"] ); } public function testSitemapsError1() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\nSitemap: URL", "domsearch" ); $res = $robotstxt->errors(); $this->assertSame($res, [2 => "Sitemap : Invalid URL provided"]); } // Host public function testHost1() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\n", "domsearch" ); $res = $robotstxt->host(); $this->assertSame($res, null); } public function testHost2() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\n\nHost: localhost", "domsearch" ); $res = $robotstxt->host(); $this->assertSame($res, "localhost"); } public function testHostError1() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "domsearch" ); $res = $robotstxt->host(); $this->assertSame($res, "localhost"); } public function testHostError2() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "domsearch" ); $res = $robotstxt->errors(); $this->assertSame($res, [4 => "Multiple Hosts set"]); } // URLAllow public function testUrlallow1() { $robotstxt = new Robotstxt("", "domsearch"); $res = $robotstxt->URLAllow("/"); $this->assertSame($res, true); } public function testUrlallow2() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /", "domsearch" ); $res = $robotstxt->URLAllow("/"); $this->assertSame($res, false); } public function testUrlallow3() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch" ); $res = $robotstxt->URLAllow("/"); $this->assertSame($res, false); } public function testUrlallow4() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch" ); $res = $robotstxt->URLAllow("/allow/file"); $this->assertSame($res, true); } public function testUrlallow5() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch" ); $res = $robotstxt->URLAllow("/allow/file.gif"); $this->assertSame($res, true); } public function testUrlallow6() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch" ); $res = $robotstxt->URLAllow("/allow/.gif"); $this->assertSame($res, false); } public function testUrlallow7() { $robotstxt = new Robotstxt( "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$", "domsearch" ); $res = $robotstxt->URLAllow("/allow/file.png"); $this->assertSame($res, false); } // Tests like http://www.robotstxt.org/norobots-rfc.txt public function testRfcUnhipbot1() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/"), false ); } public function testRfcUnhipbot2() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/index.html"), false ); } public function testRfcUnhipbot3() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/robots.txt"), true ); } public function testRfcUnhipbot4() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/server.html"), false ); } public function testRfcUnhipbot5() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/fast.html"), false ); } public function testRfcUnhipbot6() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/slow.html"), false ); } public function testRfcUnhipbot7() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/orgo.gif"), false ); } public function testRfcUnhipbot8() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/about.html"), false ); } public function testRfcUnhipbot9() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/plans.html"), false ); } public function testRfcUnhipbot10() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"), false ); } public function testRfcUnhipbot11() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"), false ); } public function testRfcWebcrawler1() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/"), true ); } public function testRfcWebcrawler2() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/index.html"), true ); } public function testRfcWebcrawler3() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/robots.txt"), true ); } public function testRfcWebcrawler4() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/server.html"), true ); } public function testRfcWebcrawler5() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/fast.html"), true ); } public function testRfcWebcrawler6() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/slow.html"), true ); } public function testRfcWebcrawler7() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/orgo.gif"), true ); } public function testRfcWebcrawler8() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/about.html"), true ); } public function testRfcWebcrawler9() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/plans.html"), true ); } public function testRfcWebcrawler10() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"), true ); } public function testRfcWebcrawler11() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"), true ); } public function testRfcExcite1() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/"), true ); } public function testRfcExcite2() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/index.html"), true ); } public function testRfcExcite3() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/robots.txt"), true ); } public function testRfcExcite4() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/server.html"), true ); } public function testRfcExcite5() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/fast.html"), true ); } public function testRfcExcite6() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/slow.html"), true ); } public function testRfcExcite7() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/orgo.gif"), true ); } public function testRfcExcite8() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/about.html"), true ); } public function testRfcExcite9() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/plans.html"), true ); } public function testRfcExcite10() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"), true ); } public function testRfcExcite11() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"), true ); } public function testRfcOther1() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame($robotstxt->URLAllow("http://www.fict.org/"), false); } public function testRfcOther2() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/index.html"), false ); } public function testRfcOther3() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/robots.txt"), true ); } public function testRfcOther4() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/server.html"), true ); } public function testRfcOther5() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/fast.html"), true ); } public function testRfcOther6() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/services/slow.html"), true ); } public function testRfcOther7() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/orgo.gif"), false ); } public function testRfcOther8() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/about.html"), true ); } public function testRfcOther9() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/org/plans.html"), false ); } public function testRfcOther10() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"), false ); } public function testRfcOther11() { // {{{ $robotstxt = new Robotstxt( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame( $robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"), true ); } // Allow/Disallow must start by slash or be empty public function testAllowDisallowSlash1() { $robotstxt = new Robotstxt( "User-Agent: *\n" . "Disallow: INVALID\n\n", "domsearch" ); $res = $robotstxt->errors(); $this->assertSame($res, [1 => "Disallow : Line must start by slash"]); } public function testAllowDisallowSlash2() { $robotstxt = new Robotstxt( "User-Agent: *\n" . "Disallow: \n\n", "domsearch" ); $res = $robotstxt->errors(); $this->assertSame($res, []); } public function testAllowDisallowSlash3() { $robotstxt = new Robotstxt( "User-Agent: *\n" . "Allow: INVALID\n\n", "domsearch" ); $res = $robotstxt->errors(); $this->assertSame($res, [1 => "Allow : Line must start by slash"]); } public function testAllowDisallowSlash4() { $robotstxt = new Robotstxt( "User-Agent: *\n" . "Allow: \n\n", "domsearch" ); $res = $robotstxt->errors(); $this->assertSame($res, []); } }