robotstxt : the analyze must be done at start. Use the constructor

git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@5344 bf3deb0d-5f1a-0410-827f-c0cc1f45334c
This commit is contained in:
2019-06-12 07:03:44 +00:00
parent f574476427
commit 330dfc3993
2 changed files with 67 additions and 133 deletions

View File

@@ -6,29 +6,25 @@ class test_model extends PHPUnit_Framework_TestCase
// Empty Robots
public function test_Construct_1 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze ("", "domsearch");
$robotstxt = new robotstxt ("", "domsearch");
$res = $robotstxt->allow ();
$this->assertSame ($res, ["/"]);
}
public function test_Construct_2 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze ("", "domsearch");
$robotstxt = new robotstxt ("", "domsearch");
$res = $robotstxt->disallow ();
$this->assertSame ($res, array ());
}
public function test_Construct_3 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze ("", "domsearch");
$robotstxt = new robotstxt ("", "domsearch");
$res = $robotstxt->sitemaps ();
$this->assertSame ($res, array ());
}
public function test_Construct_4 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze ("", "domsearch");
$robotstxt = new robotstxt ("", "domsearch");
$res = $robotstxt->crawldelay ();
$this->assertSame ($res, 3);
}
@@ -36,16 +32,14 @@ class test_model extends PHPUnit_Framework_TestCase
// Allow
public function test_allow_1 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow:\n", "domsearch");
$res = $robotstxt->allow ();
$this->assertSame ($res, ["/"]);
}
public function test_allow_2 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow:\n\nUser-Agent: DomSearch\nDisallow:\n",
"domsearch");
$res = $robotstxt->allow ();
@@ -53,8 +47,7 @@ class test_model extends PHPUnit_Framework_TestCase
}
public function test_allow_3 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: DomSearch\nDisallow:\n\nUser-Agent: *\nDisallow:\n",
"domsearch");
$res = $robotstxt->allow ();
@@ -62,8 +55,7 @@ class test_model extends PHPUnit_Framework_TestCase
}
public function test_allow_4 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: DomSearch\n".
"User-Agent: User1\n".
"User-Agent: User2\n".
@@ -77,16 +69,14 @@ class test_model extends PHPUnit_Framework_TestCase
// Disallow
public function test_disallow_1 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\n", "domsearch");
$res = $robotstxt->disallow ();
$this->assertSame ($res, ["/"]);
}
public function test_disallow_2 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\n\nUser-Agent: DomSearch\nDisallow: /\n",
"domsearch");
$res = $robotstxt->disallow ();
@@ -94,8 +84,7 @@ class test_model extends PHPUnit_Framework_TestCase
}
public function test_disallow_3 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
"domsearch");
$res = $robotstxt->disallow ();
@@ -105,8 +94,7 @@ class test_model extends PHPUnit_Framework_TestCase
// Sitemaps
public function test_sitemaps_1 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: DomSearch\nDisallow: /\n\nUser-Agent: *\nDisallow: /\n",
"domsearch");
$res = $robotstxt->sitemaps ();
@@ -114,8 +102,7 @@ class test_model extends PHPUnit_Framework_TestCase
}
public function test_sitemaps_2 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\nSitemap: http://example.com/sitemap.xml",
"domsearch");
$res = $robotstxt->sitemaps ();
@@ -123,8 +110,7 @@ class test_model extends PHPUnit_Framework_TestCase
}
public function test_sitemaps_3 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\n".
"Sitemap: http://example.com/sitemap.xml\n".
"Sitemap: http://example.com/SITEMAP.XML", "domsearch");
@@ -136,16 +122,14 @@ class test_model extends PHPUnit_Framework_TestCase
// Host
public function test_host_1 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\n", "domsearch");
$res = $robotstxt->host ();
$this->assertSame ($res, null);
}
public function test_host_2 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\n\nHost: localhost", "domsearch");
$res = $robotstxt->host ();
$this->assertSame ($res, "localhost");
@@ -154,55 +138,48 @@ class test_model extends PHPUnit_Framework_TestCase
// URLAllow
public function test_urlallow_1 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze ("", "domsearch");
$robotstxt = new robotstxt ("", "domsearch");
$res = $robotstxt->URLAllow ("/");
$this->assertSame ($res, true);
}
public function test_urlallow_2 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /", "domsearch");
$res = $robotstxt->URLAllow ("/");
$this->assertSame ($res, false);
}
public function test_urlallow_3 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch");
$res = $robotstxt->URLAllow ("/");
$this->assertSame ($res, false);
}
public function test_urlallow_4 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\nAllow: /allow/", "domsearch");
$res = $robotstxt->URLAllow ("/allow/file");
$this->assertSame ($res, true);
}
public function test_urlallow_5 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch");
$res = $robotstxt->URLAllow ("/allow/file.gif");
$this->assertSame ($res, true);
}
public function test_urlallow_6 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif$", "domsearch");
$res = $robotstxt->URLAllow ("/allow/.gif");
$this->assertSame ($res, false);
}
public function test_urlallow_7 ()
{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"User-Agent: *\nDisallow: /\nAllow: /allow/*.gif\$", "domsearch");
$res = $robotstxt->URLAllow ("/allow/file.png");
$this->assertSame ($res, false);
@@ -212,8 +189,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_1 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -238,8 +214,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_2 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -264,8 +239,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_3 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -290,8 +264,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_4 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -316,8 +289,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_5 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -342,8 +314,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_6 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -393,8 +364,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_8 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -419,8 +389,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_9 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -445,8 +414,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_10 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -471,8 +439,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_unhipbot_11 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -497,8 +464,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_1 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -523,8 +489,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_2 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -549,8 +514,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_3 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -575,8 +539,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_4 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -601,8 +564,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_5 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -627,8 +589,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_6 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -653,8 +614,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_7 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -679,8 +639,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_8 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -705,8 +664,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_9 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -731,8 +689,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_10 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -757,8 +714,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_webcrawler_11 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -783,8 +739,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_1 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -809,8 +764,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_2 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -835,8 +789,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_3 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -861,8 +814,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_4 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -887,8 +839,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_5 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -913,8 +864,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_6 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -939,8 +889,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_7 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -965,8 +914,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_8 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -991,8 +939,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_9 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1017,8 +964,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_10 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1043,8 +989,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_excite_11 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1069,8 +1014,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_1 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1094,8 +1038,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_2 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1120,8 +1063,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_3 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1146,8 +1088,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_4 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1172,8 +1113,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_5 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1198,8 +1138,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_6 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1224,8 +1163,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_7 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1250,8 +1188,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_8 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1276,8 +1213,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_9 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1302,8 +1238,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_10 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org
@@ -1328,8 +1263,7 @@ class test_model extends PHPUnit_Framework_TestCase
public function test_rfc_other_11 ()
{
// {{{
$robotstxt = new robotstxt ();
$robotstxt->analyze (
$robotstxt = new robotstxt (
"# /robots.txt for http://www.fict.org/
# comments to webmaster@fict.org

View File

@@ -47,7 +47,7 @@ class robotstxt
* @param string $crawlerName The crawler name to use in analyze
* @return $this
*/
public function analyze ($content, $crawlerName)
public function __construct ($content, $crawlerName)
// {{{
{
if (trim ($content) === "")