* @license BSD */ namespace Domframework\Tests; use Domframework\Robotstxt; /** Test the Robotstxt file */ class RobotstxtTest extends \PHPUnit_Framework_TestCase { // Empty Robots public function test_Construct_1 () { $robotstxt = new Robotstxt ("", "domsearch"); $res = $robotstxt->allow (); $this->assertSame ($res, ["/"]); } public function test_Construct_2 () { $robotstxt = new Robotstxt ("", "domsearch"); $res = $robotstxt->disallow (); $this->assertSame ($res, array ()); } public function test_Construct_3 () { $robotstxt = new Robotstxt ("", "domsearch"); $res = $robotstxt->sitemaps (); $this->assertSame ($res, array ()); } public function test_Construct_4 () { $robotstxt = new Robotstxt ("", "domsearch"); $res = $robotstxt->crawldelay (); $this->assertSame ($res, 3); } // Allow public function test_allow_1 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow:\n", "domsearch"); $res = $robotstxt->allow (); $this->assertSame ($res, ["/"]); } public function test_allow_2 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n", "domsearch"); $res = $robotstxt->allow (); $this->assertSame ($res, ["/"]); } public function test_allow_3 () { $robotstxt = new Robotstxt ( "User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n", "domsearch"); $res = $robotstxt->allow (); $this->assertSame ($res, ["/"]); } public function test_allow_4 () { $robotstxt = new Robotstxt ( "User-Agent: DomSearch\n". "User-Agent: User1\n". "User-Agent: User2\n". "Disallow:\n\n". "User-Agent: *\n". "Disallow: /\n", "domsearch"); $res = $robotstxt->allow (); $this->assertSame ($res, ["/"]); } // Disallow public function test_disallow_1 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\n", "domsearch"); $res = $robotstxt->disallow (); $this->assertSame ($res, ["/"]); } public function test_disallow_2 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n", "domsearch"); $res = $robotstxt->disallow (); $this->assertSame ($res, ["/"]); } public function test_disallow_3 () { $robotstxt = new Robotstxt ( "User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n", "domsearch"); $res = $robotstxt->disallow (); $this->assertSame ($res, ["/"]); } // Sitemaps public function test_sitemaps_1 () { $robotstxt = new Robotstxt ( "User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n", "domsearch"); $res = $robotstxt->sitemaps (); $this->assertSame ($res, []); } public function test_sitemaps_2 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml", "domsearch"); $res = $robotstxt->sitemaps (); $this->assertSame ($res, ["http://example.com/sitemap.xml"]); } public function test_sitemaps_3 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\n". "Sitemap: http://example.com/sitemap.xml\n". "Sitemap: http://example.com/SITEMAP.XML", "domsearch"); $res = $robotstxt->sitemaps (); $this->assertSame ($res, ["http://example.com/sitemap.xml", "http://example.com/SITEMAP.XML"]); } public function test_sitemaps_error_1 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\nSitemap: URL", "domsearch"); $res = $robotstxt->errors (); $this->assertSame ($res, [2 => "Sitemap : Invalid URL provided"]); } // Host public function test_host_1 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\n", "domsearch"); $res = $robotstxt->host (); $this->assertSame ($res, null); } public function test_host_2 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\n\nHost: localhost", "domsearch"); $res = $robotstxt->host (); $this->assertSame ($res, "localhost"); } public function test_host_error_1 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "domsearch"); $res = $robotstxt->host (); $this->assertSame ($res, "localhost"); } public function test_host_error_2 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "domsearch"); $res = $robotstxt->errors (); $this->assertSame ($res, [4 => "Multiple Hosts set"]); } // URLAllow public function test_urlallow_1 () { $robotstxt = new Robotstxt ("", "domsearch"); $res = $robotstxt->URLAllow ("/"); $this->assertSame ($res, true); } public function test_urlallow_2 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /", "domsearch"); $res = $robotstxt->URLAllow ("/"); $this->assertSame ($res, false); } public function test_urlallow_3 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch"); $res = $robotstxt->URLAllow ("/"); $this->assertSame ($res, false); } public function test_urlallow_4 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch"); $res = $robotstxt->URLAllow ("/allow/file"); $this->assertSame ($res, true); } public function test_urlallow_5 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch"); $res = $robotstxt->URLAllow ("/allow/file.gif"); $this->assertSame ($res, true); } public function test_urlallow_6 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch"); $res = $robotstxt->URLAllow ("/allow/.gif"); $this->assertSame ($res, false); } public function test_urlallow_7 () { $robotstxt = new Robotstxt ( "User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$", "domsearch"); $res = $robotstxt->URLAllow ("/allow/file.png"); $this->assertSame ($res, false); } // Tests like http://www.robotstxt.org/norobots-rfc.txt public function test_rfc_unhipbot_1 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/"), false); } public function test_rfc_unhipbot_2 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/index.html"), false); } public function test_rfc_unhipbot_3 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true); } public function test_rfc_unhipbot_4 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/server.html"), false); } public function test_rfc_unhipbot_5 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), false); } public function test_rfc_unhipbot_6 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), false); } public function test_rfc_unhipbot_7 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), false); } public function test_rfc_unhipbot_8 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/about.html"), false); } public function test_rfc_unhipbot_9 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), false); } public function test_rfc_unhipbot_10 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), false); } public function test_rfc_unhipbot_11 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "unhipbot" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), false); } public function test_rfc_webcrawler_1 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/"), true); } public function test_rfc_webcrawler_2 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/index.html"), true); } public function test_rfc_webcrawler_3 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true); } public function test_rfc_webcrawler_4 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/server.html"), true); } public function test_rfc_webcrawler_5 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), true); } public function test_rfc_webcrawler_6 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), true); } public function test_rfc_webcrawler_7 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), true); } public function test_rfc_webcrawler_8 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/about.html"), true); } public function test_rfc_webcrawler_9 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), true); } public function test_rfc_webcrawler_10 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), true); } public function test_rfc_webcrawler_11 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "webcrawler" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), true); } public function test_rfc_excite_1 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/"), true); } public function test_rfc_excite_2 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/index.html"), true); } public function test_rfc_excite_3 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true); } public function test_rfc_excite_4 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/server.html"), true); } public function test_rfc_excite_5 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), true); } public function test_rfc_excite_6 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), true); } public function test_rfc_excite_7 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), true); } public function test_rfc_excite_8 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/about.html"), true); } public function test_rfc_excite_9 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), true); } public function test_rfc_excite_10 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), true); } public function test_rfc_excite_11 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "excite" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), true); } public function test_rfc_other_1 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ($robotstxt->URLAllow ("http://www.fict.org/"), false); } public function test_rfc_other_2 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/index.html"), false); } public function test_rfc_other_3 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true); } public function test_rfc_other_4 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/server.html"), true); } public function test_rfc_other_5 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), true); } public function test_rfc_other_6 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), true); } public function test_rfc_other_7 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), false); } public function test_rfc_other_8 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/about.html"), true); } public function test_rfc_other_9 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), false); } public function test_rfc_other_10 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), false); } public function test_rfc_other_11 () { // {{{ $robotstxt = new Robotstxt ( "# /robots.txt for http://www.fict.org/ # comments to webmaster@fict.org User-agent: unhipbot Disallow: / User-agent: webcrawler User-agent: excite Disallow: User-agent: * Disallow: /org/plans.html Allow: /org/ Allow: /serv Allow: /~mak Disallow: /", "other" ); // }}} $this->assertSame ( $robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), true); } }