* @license BSD */ /** The Full Text Search * Analyze the provided search text (like a search engine), and create the * sql query to found the answer. It also allow to check if a sentence is valid * against a searched text * Manage the sentences (enclosed in quotes), or the standalone words, * Manage the non wanted field (beginning by -), * Do not search if the word is smaller than a parameter. * Each sentence or word can be modified by external methods separately */ class fts { /////////////////////////// //// PROPERTIES //// /////////////////////////// // {{{ /** The minimum length of a token to search */ public $minLength = 3; /** The tokens found in the query, with the minus state if the user do not * want the provided token */ private $tokens = null; /** The tokens without the too small tokens */ private $tokensMin =null; /** The regexes created by the parser */ private $regexes = null; /** The callable method to run on each word of the query */ private $callTokenWord = null; /** The callable method to run on each sentence of the query */ private $callTokenSentence = null; // }}} //////////////////////////// //// CONSTRUCTOR //// //////////////////////////// /** The constructor check the availability of the MB module */ public function __construct () // {{{ { if (! function_exists ("mb_strlen")) throw new \Exception ("PHP don't have the MB Support. Please add it !", 500); } // }}} //////////////////////// //// GETTERS //// //////////////////////// /** Get the tokens store after the search */ public function getTokens () // {{{ { return $this->tokens; } // }}} /** Get the tokens store after the search, without the too small ones */ public function getTokensMin () // {{{ { return $this->tokensMin; } // }}} /** Get the regexes defined after the analyzer */ public function getRegexes () // {{{ { return $this->regexes; } // }}} /** Set the method to call on tokens word only * The method must return the token updated * @param callable $callable The callable method * @return $this */ public function callTokenWord ($callable) // {{{ { if (! is_callable ($callable)) throw new \Exception (dgettext ("domframework", "SSE : callTokenWord : provided method is not callable"), 500); $this->callTokenWord = $callable; return $this; } // }}} /** Set the method to call on tokens sentence only * The method must return the token updated * @param callable $callable The callable method * @return $this */ public function callTokenSentence ($callable) // {{{ { if (! is_callable ($callable)) throw new \Exception (dgettext ("domframework", "SSE : callTokenSentence : provided method is not callable"), 500); $this->callTokenSentence = $callable; return $this; } // }}} ////////////////////////////// //// PUBLIC METHODS //// ////////////////////////////// /** Explode the query text provided in $query, to be used to search in * database, file... * @param string $query The text to found in the database * @return array The operator and the associated regex value to search */ public function search ($query) // {{{ { $query = trim ($query); $this->tokens = $this->tokenizer ($query); foreach ($this->tokens["tokens"] as $key => &$token) { if ($this->tokens["sentences"][$key] === true && $this->callTokenSentence) $token = call_user_func ($this->callTokenSentence, $token); elseif ($this->tokens["sentences"][$key] === false && $this->callTokenWord) $token = call_user_func ($this->callTokenWord, $token); } $this->tokensMin = $this->tokenMinLength ($this->tokens["tokens"], $this->tokens["minuses"]); $this->regexes = $this->regex ($this->tokensMin["tokens"], $this->tokensMin["minuses"]); return $this->regexes; } // }}} /** Construct the query based on the tokens. * The tokens can be updated by methods so the query may be modified by the * external methods * @return string */ public function getQuery () // {{{ { $res = ""; foreach ($this->tokens["tokens"] as $key => $token) { if ($key > 0) $res .= " "; if ($this->tokens["sentences"][$key] === true) $res .= "\"$token\""; else $res .= "$token"; } return $res; } // }}} /** Return $line if the $query match against $line, or false if not * @param string $line The line to examine * @param string $query The query to apply on it * @return string|false The $line if match, false */ public function searchString ($line, $query) // {{{ { $regexes = $this->search ($query); if (empty ($this->tokens)) return false; foreach ($this->tokens["tokens"] as $key => $searchPart) { if (trim ($searchPart) === "") continue; $match = (strpos ($line, $searchPart) !== false); if ($this->tokens["minuses"][$key] === "" && $match == 0) return false; if ($this->tokens["minuses"][$key] === "-" && $match == 1) return false; } return $line; } // }}} /** Search in SQL * @param string $query The text to found in the database * @param object $dblayeroo The dblayeroo object to query * @param array|null $fields The fields in $dblayeroo to look for data. If * null, look in all the fields defined in the dblayeroo object * @return array The result of the query */ public function searchSQL ($query, $dblayeroo, $fields) // {{{ { $regexes = $this->search ($query); if (empty ($regexes["operator"])) return array (); // Clone the object to not modify a previously defined query $dbl = clone $dblayeroo; $dbl->clearRequest (); $dbl->select (); if ($fields === null) $fields = array_keys ($dbl->fields ()); $i = 0; foreach ($fields as $field) { if (! array_key_exists ($field, $dbl->fields ())) throw new \Exception (sprintf (dgettext ("domframework", "The field '%s' doesn't exists in database"), $field), 500); if ($i > 0) $dbl->whereAddOR (); $dbl->whereAddParenthesisOpen (); $j = 0; foreach ($regexes["operator"] as $key=>$operator) { if ($j > 0) $dbl->whereAddAND (); $dbl->whereAdd ($field, $operator, $regexes["value"][$key]); $j++; } $dbl->whereAddParenthesisClose (); $i++; } // Look for the order by date if provided foreach ($dbl->fields () as $field=>$params) { if ($params[0] == "date" || $params[0] == "datetime" || $params[0] == "time") { $dbl->orderAdd ($field, "DESC"); break; } } return $dbl->execute (); } // }}} /////////////////////////////// //// PRIVATE METHODS //// /////////////////////////////// /** Create the regex associated to the provided tokens and minuses * @param array $tokens The token list * @param array $minuses The minuses list * @return array The operator and the associated regex value to search */ private function regex ($tokens, $minuses) // {{{ { if (! is_array ($tokens)) throw new \Exception ("Invalid tokens provided to fts:tokenMinLength", 500); if (! is_array ($minuses)) throw new \Exception ("Invalid minuses provided to fts:tokenMinLength", 500); $operator = array (); $value = array (); foreach ($tokens as $key=>$token) { if ($minuses[$key] === "-") $operator[$key] = "NOT REGEXP"; else $operator[$key] = "REGEXP"; $value[$key] = "(^|[<> \[\]\(\"',.;/:!?\r\n])". preg_quote ($token). "([<> \[\]\)\"',.;/:!?\r\n]|$)"; } return array ("operator"=>$operator, "value"=>$value); } // }}} /** Remove the tokens with too small length. Remove the not desired minuses * too. * @param array $tokens The token list * @param array $minuses The minuses list * @return array tokens and minuses */ private function tokenMinLength ($tokens, $minuses) // {{{ { if (! is_array ($tokens)) throw new \Exception ("Invalid tokens provided to fts:tokenMinLength", 500); if (! is_array ($minuses)) throw new \Exception ("Invalid minuses provided to fts:tokenMinLength", 500); $newTokens = array (); $newMinuses = array (); foreach ($tokens as $key=>$token) { if (mb_strlen ($token) >= $this->minLength) { $newTokens[] = $token; $newMinuses[] = $minuses[$key]; } } return array ("tokens"=>$newTokens, "minuses"=>$newMinuses); } // }}} /** Return an array with the $query tokenized * @param string $query The text to tokenize * @return array tokens and minuses */ private function tokenizer ($query) // {{{ { if (! is_string ($query)) throw new \Exception ("Invalid query provided to fts:tokenizer", 500); $debug = false; $tokens = array (); $sentences = array (); $minuses = array (); // Look for sentences $offset = 0; if ($debug) echo "\n012345678901234567890123456789\n$query\n"; while ($offset <= mb_strlen ($query)) { if ($debug) echo "OFFSET=$offset\n"; if (substr ($query, $offset, 1) === "-") { if ($debug) echo "MINUS\n"; $minus = "-"; $offset++; } else $minus = ""; $start = strpos ($query, "\"", $offset); if ($start === $offset) { // Sentence, see if there is a end $end = strpos ($query, "\"", $offset + 1); if ($end !== false) { // Complete sentence (with ending double quote) $nbchars = $end - $offset - 1; if ($debug) echo "COMPLETE SENTENCE (Start ".($offset+1). " with $nbchars chars)\n"; $token = substr ($query, $offset + 1, $nbchars); $tokens[] = $token; $sentences[] = true; $minuses[] = $minus; $offset = $end + 1; continue; } } // Word analysis $end = strpos ($query, " ", $offset); if ($end === false) $end = strlen ($query); $nbchars = $end - $offset; if ($nbchars > 0) { if ($debug) echo "WORD FOUND (Start $offset with $nbchars chars)\n"; $token = substr ($query, $offset, $nbchars); $tokens[] = $token; $sentences[] = false; $minuses[] = $minus; } $offset = $end + 1; } if ($debug) print_r ($tokens); return array ("tokens" => $tokens, "sentences" => $sentences, "minuses" => $minuses); } // }}} }