Files
DomFramework/Tests/RobotstxtTest.php
2022-11-25 21:21:30 +01:00

1490 lines
34 KiB
PHP

<?php
/** DomFramework - Tests
* @package domframework
* @author Dominique Fournier <dominique@fournier38.fr>
* @license BSD
*/
namespace Domframework\Tests;
use Domframework\Robotstxt;
/** Test the Robotstxt file
*/
class RobotstxtTest extends \PHPUnit_Framework_TestCase
{
// Empty Robots
public function test_Construct_1()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_Construct_2()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->disallow();
$this->assertSame($res, array ());
}
public function test_Construct_3()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->sitemaps();
$this->assertSame($res, array ());
}
public function test_Construct_4()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->crawldelay();
$this->assertSame($res, null);
}
// Allow
public function test_allow_1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow:\n",
"domsearch"
);
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_allow_2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n",
"domsearch"
);
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_allow_3()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n",
"domsearch"
);
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
public function test_allow_4()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\n" .
"User-Agent: User1\n" .
"User-Agent: User2\n" .
"Disallow:\n\n" .
"User-Agent: *\n" .
"Disallow: /\n",
"domsearch"
);
$res = $robotstxt->allow();
$this->assertSame($res, ["/"]);
}
// Disallow
public function test_disallow_1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n",
"domsearch"
);
$res = $robotstxt->disallow();
$this->assertSame($res, ["/"]);
}
public function test_disallow_2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n",
"domsearch"
);
$res = $robotstxt->disallow();
$this->assertSame($res, ["/"]);
}
public function test_disallow_3()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
"domsearch"
);
$res = $robotstxt->disallow();
$this->assertSame($res, ["/"]);
}
// Sitemaps
public function test_sitemaps_1()
{
$robotstxt = new Robotstxt(
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
"domsearch"
);
$res = $robotstxt->sitemaps();
$this->assertSame($res, []);
}
public function test_sitemaps_2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml",
"domsearch"
);
$res = $robotstxt->sitemaps();
$this->assertSame($res, ["http://example.com/sitemap.xml"]);
}
public function test_sitemaps_3()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n" .
"Sitemap: http://example.com/sitemap.xml\n" .
"Sitemap: http://example.com/SITEMAP.XML",
"domsearch"
);
$res = $robotstxt->sitemaps();
$this->assertSame(
$res,
["http://example.com/sitemap.xml", "http://example.com/SITEMAP.XML"]
);
}
public function test_sitemaps_error_1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nSitemap: URL",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, [2 => "Sitemap : Invalid URL provided"]);
}
// Host
public function test_host_1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n",
"domsearch"
);
$res = $robotstxt->host();
$this->assertSame($res, null);
}
public function test_host_2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost",
"domsearch"
);
$res = $robotstxt->host();
$this->assertSame($res, "localhost");
}
public function test_host_error_1()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
"domsearch"
);
$res = $robotstxt->host();
$this->assertSame($res, "localhost");
}
public function test_host_error_2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\n\nHost: localhost\nHoST: toto",
"domsearch"
);
$res = $robotstxt->errors();
$this->assertSame($res, [4 => "Multiple Hosts set"]);
}
// URLAllow
public function test_urlallow_1()
{
$robotstxt = new Robotstxt("", "domsearch");
$res = $robotstxt->URLAllow("/");
$this->assertSame($res, true);
}
public function test_urlallow_2()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /",
"domsearch"
);
$res = $robotstxt->URLAllow("/");
$this->assertSame($res, false);
}
public function test_urlallow_3()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/",
"domsearch"
);
$res = $robotstxt->URLAllow("/");
$this->assertSame($res, false);
}
public function test_urlallow_4()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/",
"domsearch"
);
$res = $robotstxt->URLAllow("/allow/file");
$this->assertSame($res, true);
}
public function test_urlallow_5()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
"domsearch"
);
$res = $robotstxt->URLAllow("/allow/file.gif");
$this->assertSame($res, true);
}
public function test_urlallow_6()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$",
"domsearch"
);
$res = $robotstxt->URLAllow("/allow/.gif");
$this->assertSame($res, false);
}
public function test_urlallow_7()
{
$robotstxt = new Robotstxt(
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$",
"domsearch"
);
$res = $robotstxt->URLAllow("/allow/file.png");
$this->assertSame($res, false);
}
// Tests like http://www.robotstxt.org/norobots-rfc.txt
public function test_rfc_unhipbot_1()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/"),
false
);
}
public function test_rfc_unhipbot_2()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/index.html"),
false
);
}
public function test_rfc_unhipbot_3()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/robots.txt"),
true
);
}
public function test_rfc_unhipbot_4()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/server.html"),
false
);
}
public function test_rfc_unhipbot_5()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/fast.html"),
false
);
}
public function test_rfc_unhipbot_6()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/slow.html"),
false
);
}
public function test_rfc_unhipbot_7()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/orgo.gif"),
false
);
}
public function test_rfc_unhipbot_8()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/about.html"),
false
);
}
public function test_rfc_unhipbot_9()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/plans.html"),
false
);
}
public function test_rfc_unhipbot_10()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"),
false
);
}
public function test_rfc_unhipbot_11()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"unhipbot"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"),
false
);
}
public function test_rfc_webcrawler_1()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/"),
true
);
}
public function test_rfc_webcrawler_2()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/index.html"),
true
);
}
public function test_rfc_webcrawler_3()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/robots.txt"),
true
);
}
public function test_rfc_webcrawler_4()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/server.html"),
true
);
}
public function test_rfc_webcrawler_5()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/fast.html"),
true
);
}
public function test_rfc_webcrawler_6()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/slow.html"),
true
);
}
public function test_rfc_webcrawler_7()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/orgo.gif"),
true
);
}
public function test_rfc_webcrawler_8()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/about.html"),
true
);
}
public function test_rfc_webcrawler_9()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/plans.html"),
true
);
}
public function test_rfc_webcrawler_10()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"),
true
);
}
public function test_rfc_webcrawler_11()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"webcrawler"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"),
true
);
}
public function test_rfc_excite_1()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/"),
true
);
}
public function test_rfc_excite_2()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/index.html"),
true
);
}
public function test_rfc_excite_3()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/robots.txt"),
true
);
}
public function test_rfc_excite_4()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/server.html"),
true
);
}
public function test_rfc_excite_5()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/fast.html"),
true
);
}
public function test_rfc_excite_6()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/slow.html"),
true
);
}
public function test_rfc_excite_7()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/orgo.gif"),
true
);
}
public function test_rfc_excite_8()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/about.html"),
true
);
}
public function test_rfc_excite_9()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/plans.html"),
true
);
}
public function test_rfc_excite_10()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"),
true
);
}
public function test_rfc_excite_11()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"excite"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"),
true
);
}
public function test_rfc_other_1()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame($robotstxt->URLAllow("http://www.fict.org/"), false);
}
public function test_rfc_other_2()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/index.html"),
false
);
}
public function test_rfc_other_3()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/robots.txt"),
true
);
}
public function test_rfc_other_4()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/server.html"),
true
);
}
public function test_rfc_other_5()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/fast.html"),
true
);
}
public function test_rfc_other_6()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/services/slow.html"),
true
);
}
public function test_rfc_other_7()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/orgo.gif"),
false
);
}
public function test_rfc_other_8()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/about.html"),
true
);
}
public function test_rfc_other_9()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/org/plans.html"),
false
);
}
public function test_rfc_other_10()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~jim/jim.html"),
false
);
}
public function test_rfc_other_11()
{
// {{{
$robotstxt = new Robotstxt(
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
User-agent: unhipbot
Disallow: /
User-agent: webcrawler
User-agent: excite
Disallow:
User-agent: *
Disallow: /org/plans.html
Allow: /org/
Allow: /serv
Allow: /~mak
Disallow: /",
"other"
);
// }}}
$this->assertSame(
$robotstxt->URLAllow("http://www.fict.org/~mak/mak.html"),
true
);
}
}