36 private $_indexPath =
'';
37 private $_liveUpdate =
true;
39 private $_indexIsDirty =
false;
41 private static $_logger = null;
47 if (self::$_logger == null) {
52 array($this,
'stateChanged'));
61 array($this,
'stateChanged'));
70 $this->_indexPath = $fileUtil->realpath(WCMF_BASE.$indexPath).
'/';
71 $fileUtil->mkdirRec($this->_indexPath);
72 if (!is_writeable($this->_indexPath)) {
75 self::$_logger->debug(
"Lucene index location: ".$this->_indexPath);
83 return $this->_indexPath;
92 $this->_liveUpdate = $liveUpdate;
101 return $this->_liveUpdate;
110 if (strlen($word) < 3) {
111 return ($message->getText(
"The search term is too short"));
114 return ($message->getText(
"The search terms are too common"));
124 $index = $this->getIndex(
false);
127 $query = \Zend_Search_Lucene_Search_QueryParser::parse($searchTerm,
'UTF-8');
129 $hits = $index->find($query);
130 if ($pagingInfo != null && $pagingInfo->getPageSize() > 0) {
131 $pagingInfo->setTotalCount(
sizeof($hits));
132 $hits = array_slice($hits, $pagingInfo->getOffset(), $pagingInfo->getPageSize());
134 foreach($hits as $hit) {
140 $highlightedRegex =
'/((<b style="color:black;background-color:#[0-9a-f]{6}">)+)([^<]+?)((<\/b>)+)/';
141 $obj = $persistenceFacade->load($oid);
143 $valueNames = $obj->getValueNames(
true);
144 foreach ($valueNames as $curValueName) {
145 $inputType = $obj->getValueProperty($curValueName,
'input_type');
146 $value = $obj->getValue($curValueName);
147 if (!is_object($value) && !is_array($value)) {
149 if (strlen($value) > 0) {
150 $highlighted = @$query->htmlFragmentHighlightMatches(strip_tags($value),
'UTF-8');
152 if (preg_match($highlightedRegex, $highlighted, $matches)) {
153 $hitStr = $matches[3];
154 $highlighted = preg_replace($highlightedRegex,
' <em class="highlighted">$3</em> ', $highlighted);
155 $highlighted = trim(preg_replace(
'/ |[\n\r\t]/',
' ', $highlighted));
163 $results[$oidStr] = array(
165 'score' => $hit->score,
166 'summary' => $summary
182 return (
boolean) $obj->
getProperty(
'is_searchable');
190 return \Zend_Search_Lucene::create($indexPath);
197 self::$_logger->debug(
"Commit index");
198 if ($this->_indexIsDirty) {
199 $index = $this->getIndex(
false);
213 $index = $this->getIndex(
false);
224 $index = $this->getIndex();
225 $oidStr = $obj->
getOID()->__toString();
229 foreach ($localization->getSupportedLanguages() as $language => $languageName) {
231 $indexObj = $localization->loadTranslation($obj, $language,
false);
233 if (self::$_logger->isDebugEnabled()) {
234 self::$_logger->debug(
"Add/Update index for: ".$oidStr.
" language:".$language);
238 $doc = new \Zend_Search_Lucene_Document();
240 $valueNames = $indexObj->getValueNames(
true);
242 $doc->addField(\Zend_Search_Lucene_Field::unIndexed(
'oid', $oidStr,
'UTF-8'));
243 $typeField = \Zend_Search_Lucene_Field::keyword(
'type', $obj->
getType(),
'UTF-8');
244 $typeField->isStored =
false;
245 $doc->addField($typeField);
246 if ($language != null) {
247 $languageField = \Zend_Search_Lucene_Field::keyword(
'lang', $language,
'UTF-8');
248 $languageField->isStored =
false;
249 $doc->addField($languageField);
252 foreach ($valueNames as $curValueName) {
253 $inputType = $indexObj->getValueProperty($curValueName,
'input_type');
254 $value = $indexObj->getValue($curValueName);
255 if (!is_object($value) && !is_array($value)) {
257 if (preg_match(
'/^text|^f?ckeditor/', $inputType)) {
258 $value = strip_tags($value);
259 $doc->addField(\Zend_Search_Lucene_Field::unStored($curValueName, $value,
'UTF-8'));
262 $field = \Zend_Search_Lucene_Field::keyword($curValueName, $value,
'UTF-8');
263 $field->isStored =
false;
264 $doc->addField($field);
269 $term = new \Zend_Search_Lucene_Index_Term($oidStr,
'oid');
270 $docIds = $index->termDocs($term);
271 foreach ($docIds as $id) {
275 $index->addDocument($doc);
277 $this->_indexIsDirty =
true;
286 if (self::$_logger->isDebugEnabled()) {
287 self::$_logger->debug(
"Delete from index: ".$obj->
getOID());
289 $index = $this->getIndex();
291 $term = new \Zend_Search_Lucene_Index_Term($obj->
getOID()->__toString(),
'oid');
292 $docIds = $index->termDocs($term);
293 foreach ($docIds as $id) {
296 $this->_indexIsDirty =
true;
305 if ($this->_liveUpdate) {
324 private function getIndex($create =
true) {
325 if (!$this->_index || $create) {
332 $stopWordsFilter = new \Zend_Search_Lucene_Analysis_TokenFilter_StopWords($stopWords);
333 $analyzer->addFilter($stopWordsFilter);
335 \Zend_Search_Lucene_Analysis_Analyzer::setDefault($analyzer);
336 \Zend_Search_Lucene_Search_Query_Wildcard::setMinPrefixLength(0);
337 \Zend_Search_Lucene_Search_QueryParser::setDefaultEncoding(
'UTF-8');
338 \Zend_Search_Lucene_Search_QueryParser::setDefaultOperator(\Zend_Search_Lucene_Search_QueryParser::B_AND);
341 $this->_index = \Zend_Search_Lucene::open($indexPath);
345 catch (\Zend_Search_Lucene_Exception $ex) {
349 return $this->_index;
359 if (preg_match(
'/^f?ckeditor/', $inputType)) {
360 $value = html_entity_decode($value, ENT_QUOTES,
'UTF-8');
370 return explode(
"\n",
$GLOBALS[
'STOP_WORDS']);
374 class Analyzer extends \Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8Num_CaseInsensitive {
379 parent::setInput($data,
'UTF-8');
stateChanged(StateChangeEvent $event)
Listen to StateChangeEvents.
setIndexPath($indexPath)
Set the path to the search index.
getType()
Get the type of the object.
getOID()
Get the object id of the PersistentObject.
commitIndex($optimize=true)
getStopWords()
Get a list of words that are forbidden to search for.
static excerpt($string, $phrase, $radius=100)
Create an excerpt from the given text around the given phrase code based on: http://stackoverflow.com/questions/1292121/how-to-generate-the-snippet-like-generated-by-google-with-php-and-mysql.
getObject()
Get the object whose state has changed.
getProperty($name)
Get the value of a named property in the object.
StateChangeEvent signals a change of the state of a PersistentObject instance.
static getLogger($name)
Get the logger with the given name.
addToIndex(PersistentObject $obj)
find($searchTerm, PagingInfo $pagingInfo=null)
deleteFromIndex(PersistentObject $obj)
IndexedSearch implementations are used to search entity objects in a search index.
static getInstance($name, $dynamicConfiguration=array())
PagingInfo contains information about a paged list.
__construct()
Constructor.
static parse($oid)
Parse a serialized object id string into an ObjectId instance.
getOldValue()
Get the old value.
LuceneSearch provides access to the search based on Zend_Search_Lucene.
getIndexPath()
Get the path to the search index.
getNewValue()
Get the new value.
isSearchable(PersistentObject $obj)
FileUtil provides basic support for file functionality like HTTP file upload.
getLiveUpdate()
Get if the search index should update itself, when persistent objects are created/updated/deleted.
setInput($data, $encoding= '')
Override method to make sure we are using utf-8.
ConfigurationException signals an exception in the configuration.
setLiveUpdate($liveUpdate)
Set if the search index should update itself, when persistent objects are created/updated/deleted.
encodeValue($value, $inputType)
Encode the given value according to the input type.
$GLOBALS['STOP_WORDS']
Standard german/english stop words taken from Lucene's StopAnalyzer.
PersistentObject defines the interface of all persistent objects.