*/ /** The Full Text Search * Analyze the provided search text (like a search engine), and create the * sql query to found the answer. * Manage the sentences (enclosed in quotes), or the standalone words, * Manage the non wanted field (beginning by -), * Do not search if the word is smaller than a parameter. */ class fts { /** The minimum length of a token to search */ public $minLength = 3; /** The tokens found in the query, with the minus state if the user do not * want the provided token */ private $tokens = null; /** The tokens without the too small tokens */ private $tokensMin =null; /** The regexes created by the parser */ private $regexes = null; /** Get the tokens store after the search */ public function getTokens () { return $this->tokens; } /** Get the tokens store after the search, without the too small ones */ public function getTokensMin () { return $this->tokensMin; } /** Get the regexes defined after the analyzer */ public function getRegexes () { return $this->regexes; } /** The constructor check the availability of the MB module */ public function __construct () { if (! function_exists ("mb_strlen")) throw new \Exception ("PHP don't have the MB Support. Please add it !", 500); } /** Search the text provided in $query in the database * @param string $query The text to found in the database * @return array The operator and the associated regex value to search */ public function search ($query) { $query = trim ($query); $this->tokens = $this->tokenizer ($query); $this->tokensMin = $this->tokenMinLength ($this->tokens["tokens"], $this->tokens["minuses"]); $this->regexes = $this->regex ($this->tokensMin["tokens"], $this->tokensMin["minuses"]); return $this->regexes; } /** Search in SQL * @param string $query The text to found in the database * @param object $dblayeroo The dblayeroo object to query * @param array|null $fields The fields in $dblayeroo to look for data. If * null, look in all the fields defined in the dblayeroo object * @return array The result of the query */ public function searchSQL ($query, $dblayeroo, $fields) { $regexes = $this->search ($query); if (empty ($regexes["operator"])) return array (); // Clone the object to not modify a previously defined query $dbl = clone $dblayeroo; $dbl->clearRequest (); $dbl->select (); if ($fields === null) $fields = array_keys ($dbl->fields ()); $i = 0; foreach ($fields as $field) { if (! array_key_exists ($field, $dbl->fields ())) throw new \Exception (sprintf ( _("The field '%s' doesn't exists in database"), $field), 500); if ($i > 0) $dbl->whereAddOR (); $dbl->whereAddParenthesisOpen (); $j = 0; foreach ($regexes["operator"] as $key=>$operator) { if ($j > 0) $dbl->whereAddAND (); $dbl->whereAdd ($field, $operator, $regexes["value"][$key]); $j++; } $dbl->whereAddParenthesisClose (); $i++; } // Look for the order by date if provided foreach ($dbl->fields () as $field=>$params) { if ($params[0] == "date" || $params[0] == "datetime" || $params[0] == "time") { $dbl->orderAdd ($field, "DESC"); break; } } return $dbl->execute (); } /** Create the regex associated to the provided tokens and minuses * @param array $tokens The token list * @param array $minuses The minuses list * @return array The operator and the associated regex value to search */ private function regex ($tokens, $minuses) { if (! is_array ($tokens)) throw new \Exception ("Invalid tokens provided to fts:tokenMinLength", 500); if (! is_array ($minuses)) throw new \Exception ("Invalid minuses provided to fts:tokenMinLength", 500); $operator = array (); $value = array (); foreach ($tokens as $key=>$token) { if ($minuses[$key] === "-") $operator[$key] = "NOT REGEXP"; else $operator[$key] = "REGEXP"; $value[$key] = "(^|[<> \(\"',.;/:!?\r\n])". preg_quote ($token). "([<> \)\"',.;/:!?\r\n]|$)"; } return array ("operator"=>$operator, "value"=>$value); } /** Remove the tokens with too small length. Remove the not desired minuses * too. * @param array $tokens The token list * @param array $minuses The minuses list * @return array tokens and minuses */ private function tokenMinLength ($tokens, $minuses) { if (! is_array ($tokens)) throw new \Exception ("Invalid tokens provided to fts:tokenMinLength", 500); if (! is_array ($minuses)) throw new \Exception ("Invalid minuses provided to fts:tokenMinLength", 500); $newTokens = array (); $newMinuses = array (); foreach ($tokens as $key=>$token) { if (mb_strlen ($token) >= $this->minLength) { $newTokens[] = $token; $newMinuses[] = $minuses[$key]; } } return array ("tokens"=>$newTokens, "minuses"=>$newMinuses); } /** Return an array with the $query tokenized * @param string $query The text to tokenize * @return array tokens and minuses */ private function tokenizer ($query) { if (! is_string ($query)) throw new \Exception ("Invalid query provided to fts:tokenizer", 500); $debug = false; $tokens = array (); $minuses = array (); // Look for sentences $offset = 0; if ($debug) echo "\n012345678901234567890123456789\n$query\n"; while ($offset <= mb_strlen ($query)) { if ($debug) echo "OFFSET=$offset\n"; if (substr ($query, $offset, 1) === "-") { if ($debug) echo "MINUS\n"; $minus = "-"; $offset++; } else $minus = ""; $start = strpos ($query, "\"", $offset); if ($start === $offset) { // Sentence, see if there is a end $end = strpos ($query, "\"", $offset + 1); if ($end !== false) { // Complete sentence (with ending double quote) $nbchars = $end - $offset - 1; if ($debug) echo "COMPLETE SENTENCE (Start ".($offset+1). " with $nbchars chars)\n"; $token = substr ($query, $offset + 1, $nbchars); $tokens[] = $token; $minuses[] = $minus; $offset = $end + 1; continue; } } // Word analysis $end = strpos ($query, " ", $offset); if ($end === false) $end = strlen ($query); $nbchars = $end - $offset; if ($debug) echo "WORD FOUND (Start $offset with $nbchars chars)\n"; $token = substr ($query, $offset, $nbchars); $tokens[] = $token; $minuses[] = $minus; $offset = $end + 1; } if ($debug) print_r ($tokens); return array ("tokens"=>$tokens, "minuses"=>$minuses); } }