1322 lines
30 KiB
PHP
1322 lines
30 KiB
PHP
<?php
|
|
/** DomFramework - Tests
|
|
* @package domframework
|
|
* @author Dominique Fournier <dominique@fournier38.fr>
|
|
* @license BSD
|
|
*/
|
|
|
|
namespace Domframework\Tests;
|
|
|
|
use Domframework\Robotstxt;
|
|
|
|
/** Test the Robotstxt file
|
|
*/
|
|
class RobotstxtTest extends \PHPUnit_Framework_TestCase
|
|
{
|
|
// Empty Robots
|
|
public function test_Construct_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt ("", "domsearch");
|
|
$res = $robotstxt->allow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
public function test_Construct_2 ()
|
|
{
|
|
$robotstxt = new Robotstxt ("", "domsearch");
|
|
$res = $robotstxt->disallow ();
|
|
$this->assertSame ($res, array ());
|
|
}
|
|
public function test_Construct_3 ()
|
|
{
|
|
$robotstxt = new Robotstxt ("", "domsearch");
|
|
$res = $robotstxt->sitemaps ();
|
|
$this->assertSame ($res, array ());
|
|
}
|
|
public function test_Construct_4 ()
|
|
{
|
|
$robotstxt = new Robotstxt ("", "domsearch");
|
|
$res = $robotstxt->crawldelay ();
|
|
$this->assertSame ($res, 3);
|
|
}
|
|
|
|
// Allow
|
|
public function test_allow_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow:\n", "domsearch");
|
|
$res = $robotstxt->allow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
public function test_allow_2 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n",
|
|
"domsearch");
|
|
$res = $robotstxt->allow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
public function test_allow_3 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n",
|
|
"domsearch");
|
|
$res = $robotstxt->allow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
public function test_allow_4 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: DomSearch\n".
|
|
"User-Agent: User1\n".
|
|
"User-Agent: User2\n".
|
|
"Disallow:\n\n".
|
|
"User-Agent: *\n".
|
|
"Disallow: /\n", "domsearch");
|
|
$res = $robotstxt->allow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
|
|
// Disallow
|
|
public function test_disallow_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\n", "domsearch");
|
|
$res = $robotstxt->disallow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
public function test_disallow_2 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n",
|
|
"domsearch");
|
|
$res = $robotstxt->disallow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
public function test_disallow_3 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
|
|
"domsearch");
|
|
$res = $robotstxt->disallow ();
|
|
$this->assertSame ($res, ["/"]);
|
|
}
|
|
|
|
// Sitemaps
|
|
public function test_sitemaps_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
|
|
"domsearch");
|
|
$res = $robotstxt->sitemaps ();
|
|
$this->assertSame ($res, []);
|
|
}
|
|
public function test_sitemaps_2 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml",
|
|
"domsearch");
|
|
$res = $robotstxt->sitemaps ();
|
|
$this->assertSame ($res, ["http://example.com/sitemap.xml"]);
|
|
}
|
|
public function test_sitemaps_3 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\n".
|
|
"Sitemap: http://example.com/sitemap.xml\n".
|
|
"Sitemap: http://example.com/SITEMAP.XML", "domsearch");
|
|
$res = $robotstxt->sitemaps ();
|
|
$this->assertSame ($res,
|
|
["http://example.com/sitemap.xml", "http://example.com/SITEMAP.XML"]);
|
|
}
|
|
|
|
public function test_sitemaps_error_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\nSitemap: URL",
|
|
"domsearch");
|
|
$res = $robotstxt->errors ();
|
|
$this->assertSame ($res, [2 => "Sitemap : Invalid URL provided"]);
|
|
}
|
|
|
|
// Host
|
|
public function test_host_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\n", "domsearch");
|
|
$res = $robotstxt->host ();
|
|
$this->assertSame ($res, null);
|
|
}
|
|
public function test_host_2 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\n\nHost: localhost", "domsearch");
|
|
$res = $robotstxt->host ();
|
|
$this->assertSame ($res, "localhost");
|
|
}
|
|
public function test_host_error_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "domsearch");
|
|
$res = $robotstxt->host ();
|
|
$this->assertSame ($res, "localhost");
|
|
}
|
|
public function test_host_error_2 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto", "domsearch");
|
|
$res = $robotstxt->errors ();
|
|
$this->assertSame ($res, [4 => "Multiple Hosts set"]);
|
|
}
|
|
|
|
// URLAllow
|
|
public function test_urlallow_1 ()
|
|
{
|
|
$robotstxt = new Robotstxt ("", "domsearch");
|
|
$res = $robotstxt->URLAllow ("/");
|
|
$this->assertSame ($res, true);
|
|
}
|
|
public function test_urlallow_2 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /", "domsearch");
|
|
$res = $robotstxt->URLAllow ("/");
|
|
$this->assertSame ($res, false);
|
|
}
|
|
public function test_urlallow_3 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch");
|
|
$res = $robotstxt->URLAllow ("/");
|
|
$this->assertSame ($res, false);
|
|
}
|
|
public function test_urlallow_4 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch");
|
|
$res = $robotstxt->URLAllow ("/allow/file");
|
|
$this->assertSame ($res, true);
|
|
}
|
|
public function test_urlallow_5 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch");
|
|
$res = $robotstxt->URLAllow ("/allow/file.gif");
|
|
$this->assertSame ($res, true);
|
|
}
|
|
public function test_urlallow_6 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch");
|
|
$res = $robotstxt->URLAllow ("/allow/.gif");
|
|
$this->assertSame ($res, false);
|
|
}
|
|
public function test_urlallow_7 ()
|
|
{
|
|
$robotstxt = new Robotstxt (
|
|
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$", "domsearch");
|
|
$res = $robotstxt->URLAllow ("/allow/file.png");
|
|
$this->assertSame ($res, false);
|
|
}
|
|
|
|
// Tests like http://www.robotstxt.org/norobots-rfc.txt
|
|
public function test_rfc_unhipbot_1 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/"), false);
|
|
}
|
|
public function test_rfc_unhipbot_2 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/index.html"), false);
|
|
}
|
|
public function test_rfc_unhipbot_3 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true);
|
|
}
|
|
public function test_rfc_unhipbot_4 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/server.html"), false);
|
|
}
|
|
public function test_rfc_unhipbot_5 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), false);
|
|
}
|
|
public function test_rfc_unhipbot_6 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), false);
|
|
}
|
|
public function test_rfc_unhipbot_7 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), false);
|
|
}
|
|
public function test_rfc_unhipbot_8 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/about.html"), false);
|
|
}
|
|
public function test_rfc_unhipbot_9 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), false);
|
|
}
|
|
public function test_rfc_unhipbot_10 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), false);
|
|
}
|
|
public function test_rfc_unhipbot_11 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "unhipbot"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), false);
|
|
}
|
|
public function test_rfc_webcrawler_1 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/"), true);
|
|
}
|
|
public function test_rfc_webcrawler_2 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/index.html"), true);
|
|
}
|
|
public function test_rfc_webcrawler_3 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true);
|
|
}
|
|
public function test_rfc_webcrawler_4 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/server.html"), true);
|
|
}
|
|
public function test_rfc_webcrawler_5 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), true);
|
|
}
|
|
public function test_rfc_webcrawler_6 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), true);
|
|
}
|
|
public function test_rfc_webcrawler_7 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), true);
|
|
}
|
|
public function test_rfc_webcrawler_8 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/about.html"), true);
|
|
}
|
|
public function test_rfc_webcrawler_9 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), true);
|
|
}
|
|
public function test_rfc_webcrawler_10 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), true);
|
|
}
|
|
public function test_rfc_webcrawler_11 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "webcrawler"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), true);
|
|
}
|
|
public function test_rfc_excite_1 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/"), true);
|
|
}
|
|
public function test_rfc_excite_2 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/index.html"), true);
|
|
}
|
|
public function test_rfc_excite_3 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true);
|
|
}
|
|
public function test_rfc_excite_4 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/server.html"), true);
|
|
}
|
|
public function test_rfc_excite_5 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), true);
|
|
}
|
|
public function test_rfc_excite_6 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), true);
|
|
}
|
|
public function test_rfc_excite_7 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), true);
|
|
}
|
|
public function test_rfc_excite_8 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/about.html"), true);
|
|
}
|
|
public function test_rfc_excite_9 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), true);
|
|
}
|
|
public function test_rfc_excite_10 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), true);
|
|
}
|
|
public function test_rfc_excite_11 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "excite"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), true);
|
|
}
|
|
public function test_rfc_other_1 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame ($robotstxt->URLAllow ("http://www.fict.org/"), false);
|
|
}
|
|
public function test_rfc_other_2 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/index.html"), false);
|
|
}
|
|
public function test_rfc_other_3 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/robots.txt"), true);
|
|
}
|
|
public function test_rfc_other_4 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/server.html"), true);
|
|
}
|
|
public function test_rfc_other_5 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/fast.html"), true);
|
|
}
|
|
public function test_rfc_other_6 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/services/slow.html"), true);
|
|
}
|
|
public function test_rfc_other_7 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/orgo.gif"), false);
|
|
}
|
|
public function test_rfc_other_8 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/about.html"), true);
|
|
}
|
|
public function test_rfc_other_9 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/org/plans.html"), false);
|
|
}
|
|
public function test_rfc_other_10 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~jim/jim.html"), false);
|
|
}
|
|
public function test_rfc_other_11 ()
|
|
{
|
|
// {{{
|
|
$robotstxt = new Robotstxt (
|
|
"# /robots.txt for http://www.fict.org/
|
|
# comments to webmaster@fict.org
|
|
|
|
User-agent: unhipbot
|
|
Disallow: /
|
|
|
|
User-agent: webcrawler
|
|
User-agent: excite
|
|
Disallow:
|
|
|
|
User-agent: *
|
|
Disallow: /org/plans.html
|
|
Allow: /org/
|
|
Allow: /serv
|
|
Allow: /~mak
|
|
Disallow: /", "other"
|
|
);
|
|
// }}}
|
|
$this->assertSame (
|
|
$robotstxt->URLAllow ("http://www.fict.org/~mak/mak.html"), true);
|
|
}
|
|
}
|