001 /* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019 package org.apache.wiki.search; 020 021 import java.io.File; 022 import java.io.IOException; 023 import java.io.InputStream; 024 import java.io.InputStreamReader; 025 import java.io.StringReader; 026 import java.io.StringWriter; 027 import java.lang.reflect.Constructor; 028 import java.util.ArrayList; 029 import java.util.Collection; 030 import java.util.Date; 031 import java.util.Iterator; 032 import java.util.Properties; 033 import java.util.Vector; 034 035 import org.apache.commons.io.IOUtils; 036 import org.apache.commons.lang.StringUtils; 037 import org.apache.log4j.Logger; 038 import org.apache.lucene.analysis.Analyzer; 039 import org.apache.lucene.analysis.TokenStream; 040 import org.apache.lucene.document.Document; 041 import org.apache.lucene.document.Field; 042 import org.apache.lucene.document.StringField; 043 import org.apache.lucene.document.TextField; 044 import org.apache.lucene.index.CorruptIndexException; 045 import org.apache.lucene.index.DirectoryReader; 046 import org.apache.lucene.index.IndexReader; 047 import org.apache.lucene.index.IndexWriter; 048 import org.apache.lucene.index.IndexWriterConfig; 049 import org.apache.lucene.index.IndexWriterConfig.OpenMode; 050 import org.apache.lucene.index.Term; 051 import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 052 import org.apache.lucene.queryparser.classic.ParseException; 053 import org.apache.lucene.queryparser.classic.QueryParser; 054 import org.apache.lucene.search.IndexSearcher; 055 import org.apache.lucene.search.Query; 056 import org.apache.lucene.search.ScoreDoc; 057 import org.apache.lucene.search.TermQuery; 058 import org.apache.lucene.search.highlight.Highlighter; 059 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 060 import org.apache.lucene.search.highlight.QueryScorer; 061 import org.apache.lucene.search.highlight.SimpleHTMLEncoder; 062 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 063 import org.apache.lucene.store.Directory; 064 import org.apache.lucene.store.LockObtainFailedException; 065 import org.apache.lucene.store.SimpleFSDirectory; 066 import org.apache.lucene.util.Version; 067 import org.apache.wiki.InternalWikiException; 068 import org.apache.wiki.WatchDog; 069 import org.apache.wiki.WikiBackgroundThread; 070 import org.apache.wiki.WikiEngine; 071 import org.apache.wiki.WikiPage; 072 import org.apache.wiki.WikiProvider; 073 import org.apache.wiki.api.exceptions.NoRequiredPropertyException; 074 import org.apache.wiki.api.exceptions.ProviderException; 075 import org.apache.wiki.attachment.Attachment; 076 import org.apache.wiki.attachment.AttachmentManager; 077 import org.apache.wiki.parser.MarkupParser; 078 import org.apache.wiki.providers.WikiPageProvider; 079 import org.apache.wiki.util.ClassUtil; 080 import org.apache.wiki.util.FileUtil; 081 import org.apache.wiki.util.TextUtil; 082 083 084 /** 085 * Interface for the search providers that handle searching the Wiki 086 * 087 * @since 2.2.21. 088 */ 089 public class LuceneSearchProvider implements SearchProvider { 090 091 protected static final Logger log = Logger.getLogger(LuceneSearchProvider.class); 092 093 private WikiEngine m_engine; 094 095 // Lucene properties. 096 097 /** Which analyzer to use. Default is StandardAnalyzer. */ 098 public static final String PROP_LUCENE_ANALYZER = "jspwiki.lucene.analyzer"; 099 100 private static final String PROP_LUCENE_INDEXDELAY = "jspwiki.lucene.indexdelay"; 101 private static final String PROP_LUCENE_INITIALDELAY = "jspwiki.lucene.initialdelay"; 102 103 private String m_analyzerClass = "org.apache.lucene.analysis.standard.ClassicAnalyzer"; 104 105 private static final String LUCENE_DIR = "lucene"; 106 107 /** These attachment file suffixes will be indexed. */ 108 public static final String[] SEARCHABLE_FILE_SUFFIXES = new String[] { ".txt", ".ini", ".xml", ".html", "htm", ".mm", ".htm", 109 ".xhtml", ".java", ".c", ".cpp", ".php", ".asm", ".sh", 110 ".properties", ".kml", ".gpx", ".loc" }; 111 112 protected static final String LUCENE_ID = "id"; 113 protected static final String LUCENE_PAGE_CONTENTS = "contents"; 114 protected static final String LUCENE_AUTHOR = "author"; 115 protected static final String LUCENE_ATTACHMENTS = "attachment"; 116 protected static final String LUCENE_PAGE_NAME = "name"; 117 118 private String m_luceneDirectory; 119 protected Vector<Object[]> m_updates = new Vector<Object[]>(); // Vector because multi-threaded. 120 121 /** Maximum number of fragments from search matches. */ 122 private static final int MAX_FRAGMENTS = 3; 123 124 /** The maximum number of hits to return from searches. */ 125 public static final int MAX_SEARCH_HITS = 99999; 126 127 private static String c_punctuationSpaces = StringUtils.repeat(" ", MarkupParser.PUNCTUATION_CHARS_ALLOWED.length() ); 128 129 /** 130 * {@inheritDoc} 131 */ 132 public void initialize(WikiEngine engine, Properties props) 133 throws NoRequiredPropertyException, IOException 134 { 135 m_engine = engine; 136 137 m_luceneDirectory = engine.getWorkDir()+File.separator+LUCENE_DIR; 138 139 int initialDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY ); 140 int indexDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY ); 141 142 m_analyzerClass = TextUtil.getStringProperty( props, PROP_LUCENE_ANALYZER, m_analyzerClass ); 143 // FIXME: Just to be simple for now, we will do full reindex 144 // only if no files are in lucene directory. 145 146 File dir = new File(m_luceneDirectory); 147 148 log.info("Lucene enabled, cache will be in: "+dir.getAbsolutePath()); 149 150 try 151 { 152 if( !dir.exists() ) 153 { 154 dir.mkdirs(); 155 } 156 157 if( !dir.exists() || !dir.canWrite() || !dir.canRead() ) 158 { 159 log.error("Cannot write to Lucene directory, disabling Lucene: "+dir.getAbsolutePath()); 160 throw new IOException( "Invalid Lucene directory." ); 161 } 162 163 String[] filelist = dir.list(); 164 165 if( filelist == null ) 166 { 167 throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath()); 168 } 169 } 170 catch ( IOException e ) 171 { 172 log.error("Problem while creating Lucene index - not using Lucene.", e); 173 } 174 175 // Start the Lucene update thread, which waits first 176 // for a little while before starting to go through 177 // the Lucene "pages that need updating". 178 LuceneUpdater updater = new LuceneUpdater( m_engine, this, initialDelay, indexDelay ); 179 updater.start(); 180 } 181 182 /** 183 * Returns the handling engine. 184 * 185 * @return Current WikiEngine 186 */ 187 protected WikiEngine getEngine() 188 { 189 return m_engine; 190 } 191 192 /** 193 * Performs a full Lucene reindex, if necessary. 194 * 195 * @throws IOException If there's a problem during indexing 196 */ 197 protected void doFullLuceneReindex() 198 throws IOException 199 { 200 File dir = new File(m_luceneDirectory); 201 202 String[] filelist = dir.list(); 203 204 if( filelist == null ) 205 { 206 throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath()); 207 } 208 209 try 210 { 211 if( filelist.length == 0 ) 212 { 213 // 214 // No files? Reindex! 215 // 216 Date start = new Date(); 217 IndexWriter writer = null; 218 219 log.info("Starting Lucene reindexing, this can take a couple of minutes..."); 220 221 Directory luceneDir = new SimpleFSDirectory(dir, null); 222 223 try 224 { 225 writer = getIndexWriter( luceneDir ); 226 Collection allPages = m_engine.getPageManager().getAllPages(); 227 228 for( Iterator iterator = allPages.iterator(); iterator.hasNext(); ) 229 { 230 WikiPage page = (WikiPage) iterator.next(); 231 232 try 233 { 234 String text = m_engine.getPageManager().getPageText( page.getName(), 235 WikiProvider.LATEST_VERSION ); 236 luceneIndexPage( page, text, writer ); 237 } 238 catch( IOException e ) 239 { 240 log.warn( "Unable to index page " + page.getName() + ", continuing to next ", e ); 241 } 242 } 243 244 Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments(); 245 for( Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) 246 { 247 Attachment att = (Attachment) iterator.next(); 248 249 try 250 { 251 String text = getAttachmentContent( att.getName(), WikiProvider.LATEST_VERSION ); 252 luceneIndexPage( att, text, writer ); 253 } 254 catch( IOException e ) 255 { 256 log.warn( "Unable to index attachment " + att.getName() + ", continuing to next", e ); 257 } 258 } 259 260 } 261 finally 262 { 263 close( writer ); 264 } 265 266 Date end = new Date(); 267 log.info( "Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds." ); 268 } 269 else 270 { 271 log.info("Files found in Lucene directory, not reindexing."); 272 } 273 } 274 catch( NoClassDefFoundError e ) 275 { 276 log.info("Lucene libraries do not exist - not using Lucene."); 277 } 278 catch ( IOException e ) 279 { 280 log.error("Problem while creating Lucene index - not using Lucene.", e); 281 } 282 catch ( ProviderException e ) 283 { 284 log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e); 285 throw new IllegalArgumentException("unable to create Lucene index"); 286 } 287 catch( Exception e ) 288 { 289 log.error("Unable to start lucene",e); 290 } 291 292 } 293 294 /** 295 * Fetches the attachment content from the repository. 296 * Content is flat text that can be used for indexing/searching or display 297 * 298 * @param attachmentName Name of the attachment. 299 * @param version The version of the attachment. 300 * 301 * @return the content of the Attachment as a String. 302 */ 303 protected String getAttachmentContent( String attachmentName, int version ) 304 { 305 AttachmentManager mgr = m_engine.getAttachmentManager(); 306 307 try 308 { 309 Attachment att = mgr.getAttachmentInfo( attachmentName, version ); 310 //FIXME: Find out why sometimes att is null 311 if(att != null) 312 { 313 return getAttachmentContent( att ); 314 } 315 } 316 catch (ProviderException e) 317 { 318 log.error("Attachment cannot be loaded", e); 319 } 320 // Something was wrong, no result is returned. 321 return null; 322 } 323 324 /** 325 * @param att Attachment to get content for. Filename extension is used to determine the type of the attachment. 326 * @return String representing the content of the file. 327 * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing. 328 * This should be replaced /moved to Attachment search providers or some other 'pluggable' wat to search attachments 329 */ 330 protected String getAttachmentContent( Attachment att ) 331 { 332 AttachmentManager mgr = m_engine.getAttachmentManager(); 333 //FIXME: Add attachment plugin structure 334 335 String filename = att.getFileName(); 336 337 boolean searchSuffix = false; 338 for( String suffix : SEARCHABLE_FILE_SUFFIXES ) 339 { 340 if( filename.endsWith( suffix ) ) 341 { 342 searchSuffix = true; 343 } 344 } 345 346 String out = null; 347 if( searchSuffix ) 348 { 349 InputStream attStream = null; 350 StringWriter sout = new StringWriter(); 351 352 try 353 { 354 attStream = mgr.getAttachmentStream( att ); 355 FileUtil.copyContents( new InputStreamReader(attStream), sout ); 356 out = sout.toString(); 357 } 358 catch (ProviderException e) 359 { 360 log.error("Attachment cannot be loaded", e); 361 } 362 catch (IOException e) 363 { 364 log.error("Attachment cannot be loaded", e); 365 } 366 finally 367 { 368 IOUtils.closeQuietly( attStream ); 369 IOUtils.closeQuietly( sout ); 370 } 371 } 372 373 return out; 374 } 375 376 /** 377 * Updates the lucene index for a single page. 378 * 379 * @param page The WikiPage to check 380 * @param text The page text to index. 381 */ 382 protected synchronized void updateLuceneIndex( WikiPage page, String text ) 383 { 384 IndexWriter writer = null; 385 386 log.debug("Updating Lucene index for page '" + page.getName() + "'..."); 387 388 Directory luceneDir = null; 389 try 390 { 391 pageRemoved( page ); 392 393 // Now add back the new version. 394 luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null); 395 writer = getIndexWriter( luceneDir ); 396 397 luceneIndexPage( page, text, writer ); 398 } 399 catch ( IOException e ) 400 { 401 log.error("Unable to update page '" + page.getName() + "' from Lucene index", e); 402 // reindexPage( page ); 403 } 404 catch( Exception e ) 405 { 406 log.error("Unexpected Lucene exception - please check configuration!",e); 407 // reindexPage( page ); 408 } 409 finally 410 { 411 close( writer ); 412 } 413 414 log.debug("Done updating Lucene index for page '" + page.getName() + "'."); 415 } 416 417 418 private Analyzer getLuceneAnalyzer() throws ProviderException 419 { 420 try 421 { 422 Class< ? > clazz = ClassUtil.findClass( "", m_analyzerClass ); 423 Constructor< ? > constructor = clazz.getConstructor( Version.LUCENE_47.getClass() ); 424 Analyzer analyzer = (Analyzer) constructor.newInstance( Version.LUCENE_47 ); 425 return analyzer; 426 } 427 catch( Exception e ) 428 { 429 String msg = "Could not get LuceneAnalyzer class " + m_analyzerClass + ", reason: "; 430 log.error( msg, e ); 431 throw new ProviderException( msg + e ); 432 } 433 } 434 435 /** 436 * Indexes page using the given IndexWriter. 437 * 438 * @param page WikiPage 439 * @param text Page text to index 440 * @param writer The Lucene IndexWriter to use for indexing 441 * @return the created index Document 442 * @throws IOException If there's an indexing problem 443 */ 444 protected Document luceneIndexPage( WikiPage page, String text, IndexWriter writer ) 445 throws IOException 446 { 447 if( log.isDebugEnabled() ) log.debug( "Indexing "+page.getName()+"..." ); 448 449 // make a new, empty document 450 Document doc = new Document(); 451 452 if( text == null ) return doc; 453 454 // Raw name is the keyword we'll use to refer to this document for updates. 455 Field field = new Field( LUCENE_ID, page.getName(), StringField.TYPE_STORED ); 456 doc.add( field ); 457 458 // Body text. It is stored in the doc for search contexts. 459 field = new Field( LUCENE_PAGE_CONTENTS, text, TextField.TYPE_STORED ); 460 doc.add( field ); 461 462 // Allow searching by page name. Both beautified and raw 463 String unTokenizedTitle = StringUtils.replaceChars( page.getName(), 464 MarkupParser.PUNCTUATION_CHARS_ALLOWED, 465 c_punctuationSpaces ); 466 467 field = new Field( LUCENE_PAGE_NAME, 468 TextUtil.beautifyString( page.getName() ) + " " + unTokenizedTitle, 469 TextField.TYPE_STORED ); 470 doc.add( field ); 471 472 // Allow searching by authorname 473 474 if( page.getAuthor() != null ) 475 { 476 field = new Field( LUCENE_AUTHOR, page.getAuthor(), TextField.TYPE_STORED ); 477 doc.add( field ); 478 } 479 480 // Now add the names of the attachments of this page 481 try 482 { 483 Collection attachments = m_engine.getAttachmentManager().listAttachments(page); 484 String attachmentNames = ""; 485 486 for( Iterator it = attachments.iterator(); it.hasNext(); ) 487 { 488 Attachment att = (Attachment) it.next(); 489 attachmentNames += att.getName() + ";"; 490 } 491 field = new Field( LUCENE_ATTACHMENTS, attachmentNames, TextField.TYPE_STORED ); 492 doc.add( field ); 493 494 } 495 catch(ProviderException e) 496 { 497 // Unable to read attachments 498 log.error("Failed to get attachments for page", e); 499 } 500 writer.addDocument(doc); 501 502 return doc; 503 } 504 505 /** 506 * {@inheritDoc} 507 */ 508 public void pageRemoved( WikiPage page ) 509 { 510 IndexWriter writer = null; 511 try 512 { 513 Directory luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null); 514 writer = getIndexWriter( luceneDir ); 515 Query query = new TermQuery( new Term( LUCENE_ID, page.getName() ) ); 516 writer.deleteDocuments( query ); 517 } 518 catch ( Exception e ) 519 { 520 log.error("Unable to remove page '" + page.getName() + "' from Lucene index", e); 521 } 522 finally 523 { 524 close( writer ); 525 } 526 } 527 528 IndexWriter getIndexWriter( Directory luceneDir ) throws CorruptIndexException, 529 LockObtainFailedException, IOException, ProviderException 530 { 531 IndexWriter writer = null; 532 IndexWriterConfig writerConfig = new IndexWriterConfig( Version.LUCENE_47, getLuceneAnalyzer() ); 533 writerConfig.setOpenMode( OpenMode.CREATE_OR_APPEND ); 534 writer = new IndexWriter( luceneDir, writerConfig ); 535 536 // writer.setInfoStream( System.out ); 537 return writer; 538 } 539 540 void close( IndexWriter writer ) 541 { 542 try 543 { 544 if( writer != null ) 545 { 546 writer.close( true ); 547 } 548 } 549 catch( IOException e ) 550 { 551 log.error( e ); 552 } 553 } 554 555 556 /** 557 * Adds a page-text pair to the lucene update queue. Safe to call always 558 * 559 * @param page WikiPage to add to the update queue. 560 */ 561 public void reindexPage( WikiPage page ) 562 { 563 if( page != null ) 564 { 565 String text; 566 567 // TODO: Think if this was better done in the thread itself? 568 569 if( page instanceof Attachment ) 570 { 571 text = getAttachmentContent( (Attachment) page ); 572 } 573 else 574 { 575 text = m_engine.getPureText( page ); 576 } 577 578 if( text != null ) 579 { 580 // Add work item to m_updates queue. 581 Object[] pair = new Object[2]; 582 pair[0] = page; 583 pair[1] = text; 584 m_updates.add(pair); 585 log.debug("Scheduling page " + page.getName() + " for index update"); 586 } 587 } 588 } 589 590 /** 591 * {@inheritDoc} 592 */ 593 public Collection findPages( String query ) 594 throws ProviderException 595 { 596 return findPages( query, FLAG_CONTEXTS ); 597 } 598 599 /** 600 * Create contexts also. Generating contexts can be expensive, 601 * so they're not on by default. 602 */ 603 public static final int FLAG_CONTEXTS = 0x01; 604 605 /** 606 * Searches pages using a particular combination of flags. 607 * 608 * @param query The query to perform in Lucene query language 609 * @param flags A set of flags 610 * @return A Collection of SearchResult instances 611 * @throws ProviderException if there is a problem with the backend 612 */ 613 public Collection findPages( String query, int flags ) 614 throws ProviderException 615 { 616 IndexSearcher searcher = null; 617 ArrayList<SearchResult> list = null; 618 Highlighter highlighter = null; 619 620 try 621 { 622 String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; 623 QueryParser qp = new MultiFieldQueryParser( Version.LUCENE_47, queryfields, getLuceneAnalyzer() ); 624 625 //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); 626 Query luceneQuery = qp.parse( query ); 627 628 if( (flags & FLAG_CONTEXTS) != 0 ) 629 { 630 highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), 631 new SimpleHTMLEncoder(), 632 new QueryScorer(luceneQuery)); 633 } 634 635 try 636 { 637 File dir = new File(m_luceneDirectory); 638 Directory luceneDir = new SimpleFSDirectory(dir, null); 639 IndexReader reader = DirectoryReader.open(luceneDir); 640 searcher = new IndexSearcher(reader); 641 } 642 catch( Exception ex ) 643 { 644 log.info("Lucene not yet ready; indexing not started",ex); 645 return null; 646 } 647 648 ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs; 649 650 list = new ArrayList<SearchResult>(hits.length); 651 for ( int curr = 0; curr < hits.length; curr++ ) 652 { 653 int docID = hits[curr].doc; 654 Document doc = searcher.doc( docID ); 655 String pageName = doc.get(LUCENE_ID); 656 WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); 657 658 if(page != null) 659 { 660 if(page instanceof Attachment) 661 { 662 // Currently attachments don't look nice on the search-results page 663 // When the search-results are cleaned up this can be enabled again. 664 } 665 666 int score = (int)(hits[curr].score * 100); 667 668 669 // Get highlighted search contexts 670 String text = doc.get(LUCENE_PAGE_CONTENTS); 671 672 String[] fragments = new String[0]; 673 if( text != null && highlighter != null ) 674 { 675 TokenStream tokenStream = getLuceneAnalyzer() 676 .tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); 677 fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); 678 679 } 680 681 SearchResult result = new SearchResultImpl( page, score, fragments ); 682 list.add(result); 683 } 684 else 685 { 686 log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); 687 pageRemoved(new WikiPage( m_engine, pageName )); 688 } 689 } 690 } 691 catch( IOException e ) 692 { 693 log.error("Failed during lucene search",e); 694 } 695 catch( ParseException e ) 696 { 697 log.info("Broken query; cannot parse query ",e); 698 699 throw new ProviderException("You have entered a query Lucene cannot process: "+e.getMessage()); 700 } 701 catch( InvalidTokenOffsetsException e ) 702 { 703 log.error("Tokens are incompatible with provided text ",e); 704 } 705 finally 706 { 707 if( searcher != null ) 708 { 709 try 710 { 711 searcher.getIndexReader().close(); 712 } 713 catch( IOException e ) 714 { 715 log.error( e ); 716 } 717 } 718 } 719 720 return list; 721 } 722 723 /** 724 * {@inheritDoc} 725 */ 726 public String getProviderInfo() 727 { 728 return "LuceneSearchProvider"; 729 } 730 731 /** 732 * Updater thread that updates Lucene indexes. 733 */ 734 private static final class LuceneUpdater extends WikiBackgroundThread 735 { 736 protected static final int INDEX_DELAY = 5; 737 protected static final int INITIAL_DELAY = 60; 738 private final LuceneSearchProvider m_provider; 739 740 private int m_initialDelay; 741 742 private WatchDog m_watchdog; 743 744 private LuceneUpdater( WikiEngine engine, LuceneSearchProvider provider, 745 int initialDelay, int indexDelay ) 746 { 747 super( engine, indexDelay ); 748 m_provider = provider; 749 setName("JSPWiki Lucene Indexer"); 750 } 751 752 public void startupTask() throws Exception 753 { 754 m_watchdog = getEngine().getCurrentWatchDog(); 755 756 // Sleep initially... 757 try 758 { 759 Thread.sleep( m_initialDelay * 1000L ); 760 } 761 catch( InterruptedException e ) 762 { 763 throw new InternalWikiException("Interrupted while waiting to start."); 764 } 765 766 m_watchdog.enterState("Full reindex"); 767 // Reindex everything 768 m_provider.doFullLuceneReindex(); 769 m_watchdog.exitState(); 770 } 771 772 public void backgroundTask() throws Exception 773 { 774 m_watchdog.enterState("Emptying index queue", 60); 775 776 synchronized ( m_provider.m_updates ) 777 { 778 while( m_provider.m_updates.size() > 0 ) 779 { 780 Object[] pair = m_provider.m_updates.remove(0); 781 WikiPage page = ( WikiPage ) pair[0]; 782 String text = ( String ) pair[1]; 783 m_provider.updateLuceneIndex(page, text); 784 } 785 } 786 787 m_watchdog.exitState(); 788 } 789 790 } 791 792 // FIXME: This class is dumb; needs to have a better implementation 793 private static class SearchResultImpl 794 implements SearchResult 795 { 796 private WikiPage m_page; 797 private int m_score; 798 private String[] m_contexts; 799 800 public SearchResultImpl( WikiPage page, int score, String[] contexts ) 801 { 802 m_page = page; 803 m_score = score; 804 m_contexts = contexts != null ? contexts.clone() : null; 805 } 806 807 public WikiPage getPage() 808 { 809 return m_page; 810 } 811 812 /* (non-Javadoc) 813 * @see org.apache.wiki.SearchResult#getScore() 814 */ 815 public int getScore() 816 { 817 return m_score; 818 } 819 820 821 public String[] getContexts() 822 { 823 return m_contexts; 824 } 825 } 826 }