* dblayeroo : add a new dblayer class. More OO, supports more options than dblayer
* fts : The Full Text Search module for dummies git-svn-id: https://svn.fournier38.fr/svn/ProgSVN/trunk@3459 bf3deb0d-5f1a-0410-827f-c0cc1f45334c
This commit is contained in:
222
fts.php
Normal file
222
fts.php
Normal file
@@ -0,0 +1,222 @@
|
||||
<?php
|
||||
/** DomFramework
|
||||
* @package domframework
|
||||
* @author Dominique Fournier <dominique@fournier38.fr>
|
||||
*/
|
||||
|
||||
/** The Full Text Search
|
||||
* Analyze the provided search text (like a search engine), and create the
|
||||
* sql query to found the answer.
|
||||
* Manage the sentences (enclosed in quotes), or the standalone words,
|
||||
* Manage the non wanted field (beginning by -),
|
||||
* Do not search if the word is smaller than a parameter.
|
||||
*/
|
||||
class fts
|
||||
{
|
||||
/** The minimum length of a token to search
|
||||
*/
|
||||
public $minLength = 3;
|
||||
|
||||
/** The tokens found in the query, with the minus state if the user do not
|
||||
* want the provided token
|
||||
*/
|
||||
private $tokens = null;
|
||||
|
||||
/** The tokens without the too small tokens
|
||||
*/
|
||||
private $tokensMin =null;
|
||||
|
||||
/** The regexes created by the parser
|
||||
*/
|
||||
private $regexes = null;
|
||||
|
||||
/** Get the tokens store after the search
|
||||
*/
|
||||
public function getTokens ()
|
||||
{
|
||||
return $this->tokens;
|
||||
}
|
||||
|
||||
/** Get the tokens store after the search, without the too small ones
|
||||
*/
|
||||
public function getTokensMin ()
|
||||
{
|
||||
return $this->tokensMin;
|
||||
}
|
||||
|
||||
/** Get the regexes defined after the analyzer
|
||||
*/
|
||||
public function getRegexes ()
|
||||
{
|
||||
return $this->regexes;
|
||||
}
|
||||
|
||||
/** Search the text provided in $query in the database
|
||||
* @param string $query The text to found in the database
|
||||
* @return array The operator and the associated regex value to search
|
||||
*/
|
||||
public function search ($query)
|
||||
{
|
||||
$query = trim ($query);
|
||||
$this->tokens = $this->tokenizer ($query);
|
||||
$this->tokensMin = $this->tokenMinLength ($this->tokens["tokens"],
|
||||
$this->tokens["minuses"]);
|
||||
$this->regexes = $this->regex ($this->tokensMin["tokens"],
|
||||
$this->tokensMin["minuses"]);
|
||||
return $this->regexes;
|
||||
}
|
||||
|
||||
/** Search in SQL
|
||||
* @param string $query The text to found in the database
|
||||
* @param object $dblayeroo The dblayeroo object to query
|
||||
* @param array|null $fields The fields in $dblayeroo to look for data. If
|
||||
* null, look in all the fields defined in the dblayeroo object
|
||||
* @return array The result of the query
|
||||
*/
|
||||
public function searchSQL ($query, $dblayeroo, $fields)
|
||||
{
|
||||
$regexes = $this->search ($query);
|
||||
// Clone the object to not modify a previously defined query
|
||||
$dbl = clone $dblayeroo;
|
||||
$dbl->clearRequest ();
|
||||
$dbl->select ();
|
||||
if ($fields === null)
|
||||
$fields = array_keys ($dbl->fields ());
|
||||
$i = 0;
|
||||
foreach ($fields as $field)
|
||||
{
|
||||
if (! array_key_exists ($field, $dbl->fields ()))
|
||||
throw new \Exception (sprintf (
|
||||
_("The field '%s' doesn't exists in database"),
|
||||
$field), 500);
|
||||
if ($i > 0)
|
||||
$dbl->whereAddOR ();
|
||||
$dbl->whereAddParenthesisOpen ();
|
||||
$j = 0;
|
||||
foreach ($regexes["operator"] as $key=>$operator)
|
||||
{
|
||||
if ($j > 0)
|
||||
$dbl->whereAddAND ();
|
||||
$dbl->whereAdd ($field, $operator, $regexes["value"][$key]);
|
||||
$j++;
|
||||
}
|
||||
$dbl->whereAddParenthesisClose ();
|
||||
$i++;
|
||||
}
|
||||
return $dbl->execute ();
|
||||
}
|
||||
|
||||
/** Create the regex associated to the provided tokens and minuses
|
||||
* @param array $tokens The token list
|
||||
* @param array $minuses The minuses list
|
||||
* @return array The operator and the associated regex value to search
|
||||
*/
|
||||
private function regex ($tokens, $minuses)
|
||||
{
|
||||
if (! is_array ($tokens))
|
||||
throw new \Exception ("Invalid tokens provided to fts:tokenMinLength",
|
||||
500);
|
||||
if (! is_array ($minuses))
|
||||
throw new \Exception ("Invalid minuses provided to fts:tokenMinLength",
|
||||
500);
|
||||
$operator = array ();
|
||||
$value = array ();
|
||||
foreach ($tokens as $key=>$token)
|
||||
{
|
||||
if ($minuses[$key] === "-")
|
||||
$operator[$key] = "NOT REGEXP";
|
||||
else
|
||||
$operator[$key] = "REGEXP";
|
||||
$value[$key] = "(^|[<> \(\",.;/:!?])".
|
||||
preg_quote ($token).
|
||||
"([<> \)\",.;/:!?]|$)";
|
||||
}
|
||||
return array ("operator"=>$operator, "value"=>$value);
|
||||
}
|
||||
|
||||
/** Remove the tokens with too small length. Remove the not desired minuses
|
||||
* too.
|
||||
* @param array $tokens The token list
|
||||
* @param array $minuses The minuses list
|
||||
* @return array tokens and minuses
|
||||
*/
|
||||
private function tokenMinLength ($tokens, $minuses)
|
||||
{
|
||||
if (! is_array ($tokens))
|
||||
throw new \Exception ("Invalid tokens provided to fts:tokenMinLength",
|
||||
500);
|
||||
if (! is_array ($minuses))
|
||||
throw new \Exception ("Invalid minuses provided to fts:tokenMinLength",
|
||||
500);
|
||||
$newTokens = array ();
|
||||
$newMinuses = array ();
|
||||
foreach ($tokens as $key=>$token)
|
||||
{
|
||||
if (mb_strlen ($token) >= $this->minLength)
|
||||
{
|
||||
$newTokens[] = $token;
|
||||
$newMinuses[] = $minuses[$key];
|
||||
}
|
||||
}
|
||||
return array ("tokens"=>$newTokens, "minuses"=>$newMinuses);
|
||||
}
|
||||
|
||||
/** Return an array with the $query tokenized
|
||||
* @param string $query The text to tokenize
|
||||
* @return array tokens and minuses
|
||||
*/
|
||||
private function tokenizer ($query)
|
||||
{
|
||||
if (! is_string ($query))
|
||||
throw new \Exception ("Invalid query provided to fts:tokenizer", 500);
|
||||
$debug = false;
|
||||
$tokens = array ();
|
||||
$minuses = array ();
|
||||
// Look for sentences
|
||||
$offset = 0;
|
||||
if ($debug) echo "\n012345678901234567890123456789\n$query\n";
|
||||
while ($offset <= mb_strlen ($query))
|
||||
{
|
||||
if ($debug) echo "OFFSET=$offset\n";
|
||||
if (substr ($query, $offset, 1) === "-")
|
||||
{
|
||||
if ($debug) echo "MINUS\n";
|
||||
$minus = "-";
|
||||
$offset++;
|
||||
}
|
||||
else
|
||||
$minus = "";
|
||||
$start = strpos ($query, "\"", $offset);
|
||||
if ($start === $offset)
|
||||
{
|
||||
// Sentence, see if there is a end
|
||||
$end = strpos ($query, "\"", $offset + 1);
|
||||
if ($end !== false)
|
||||
{
|
||||
// Complete sentence (with ending double quote)
|
||||
$nbchars = $end - $offset - 1;
|
||||
if ($debug)
|
||||
echo "COMPLETE SENTENCE (Start ".($offset+1).
|
||||
" with $nbchars chars)\n";
|
||||
$token = substr ($query, $offset + 1, $nbchars);
|
||||
$tokens[] = $token;
|
||||
$minuses[] = $minus;
|
||||
$offset = $end + 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
// Word analysis
|
||||
$end = strpos ($query, " ", $offset);
|
||||
if ($end === false)
|
||||
$end = strlen ($query);
|
||||
$nbchars = $end - $offset;
|
||||
if ($debug) echo "WORD FOUND (Start $offset with $nbchars chars)\n";
|
||||
$token = substr ($query, $offset, $nbchars);
|
||||
$tokens[] = $token;
|
||||
$minuses[] = $minus;
|
||||
$offset = $end + 1;
|
||||
}
|
||||
if ($debug) print_r ($tokens);
|
||||
return array ("tokens"=>$tokens, "minuses"=>$minuses);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user