001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.search; 020 021import java.io.File; 022import java.io.IOException; 023import java.io.InputStream; 024import java.io.InputStreamReader; 025import java.io.StringReader; 026import java.io.StringWriter; 027import java.lang.reflect.Constructor; 028import java.util.ArrayList; 029import java.util.Collection; 030import java.util.Date; 031import java.util.Iterator; 032import java.util.Properties; 033import java.util.Vector; 034 035import org.apache.commons.io.IOUtils; 036import org.apache.commons.lang.StringUtils; 037import org.apache.log4j.Logger; 038import org.apache.lucene.analysis.Analyzer; 039import org.apache.lucene.analysis.TokenStream; 040import org.apache.lucene.document.Document; 041import org.apache.lucene.document.Field; 042import org.apache.lucene.document.StringField; 043import org.apache.lucene.document.TextField; 044import org.apache.lucene.index.CorruptIndexException; 045import org.apache.lucene.index.DirectoryReader; 046import org.apache.lucene.index.IndexReader; 047import org.apache.lucene.index.IndexWriter; 048import org.apache.lucene.index.IndexWriterConfig; 049import org.apache.lucene.index.IndexWriterConfig.OpenMode; 050import org.apache.lucene.index.Term; 051import org.apache.lucene.queryparser.classic.MultiFieldQueryParser; 052import org.apache.lucene.queryparser.classic.ParseException; 053import org.apache.lucene.queryparser.classic.QueryParser; 054import org.apache.lucene.search.IndexSearcher; 055import org.apache.lucene.search.Query; 056import org.apache.lucene.search.ScoreDoc; 057import org.apache.lucene.search.TermQuery; 058import org.apache.lucene.search.highlight.Highlighter; 059import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 060import org.apache.lucene.search.highlight.QueryScorer; 061import org.apache.lucene.search.highlight.SimpleHTMLEncoder; 062import org.apache.lucene.search.highlight.SimpleHTMLFormatter; 063import org.apache.lucene.store.Directory; 064import org.apache.lucene.store.LockObtainFailedException; 065import org.apache.lucene.store.SimpleFSDirectory; 066import org.apache.lucene.util.Version; 067import org.apache.wiki.InternalWikiException; 068import org.apache.wiki.WatchDog; 069import org.apache.wiki.WikiBackgroundThread; 070import org.apache.wiki.WikiContext; 071import org.apache.wiki.WikiEngine; 072import org.apache.wiki.WikiPage; 073import org.apache.wiki.WikiProvider; 074import org.apache.wiki.api.exceptions.NoRequiredPropertyException; 075import org.apache.wiki.api.exceptions.ProviderException; 076import org.apache.wiki.attachment.Attachment; 077import org.apache.wiki.attachment.AttachmentManager; 078import org.apache.wiki.auth.AuthorizationManager; 079import org.apache.wiki.auth.permissions.PagePermission; 080import org.apache.wiki.parser.MarkupParser; 081import org.apache.wiki.providers.WikiPageProvider; 082import org.apache.wiki.util.ClassUtil; 083import org.apache.wiki.util.FileUtil; 084import org.apache.wiki.util.TextUtil; 085 086 087/** 088 * Interface for the search providers that handle searching the Wiki 089 * 090 * @since 2.2.21. 091 */ 092public class LuceneSearchProvider implements SearchProvider { 093 094 protected static final Logger log = Logger.getLogger(LuceneSearchProvider.class); 095 096 private WikiEngine m_engine; 097 098 // Lucene properties. 099 100 /** Which analyzer to use. Default is StandardAnalyzer. */ 101 public static final String PROP_LUCENE_ANALYZER = "jspwiki.lucene.analyzer"; 102 103 private static final String PROP_LUCENE_INDEXDELAY = "jspwiki.lucene.indexdelay"; 104 private static final String PROP_LUCENE_INITIALDELAY = "jspwiki.lucene.initialdelay"; 105 106 private String m_analyzerClass = "org.apache.lucene.analysis.standard.ClassicAnalyzer"; 107 108 private static final String LUCENE_DIR = "lucene"; 109 110 /** These attachment file suffixes will be indexed. */ 111 public static final String[] SEARCHABLE_FILE_SUFFIXES = new String[] { ".txt", ".ini", ".xml", ".html", "htm", ".mm", ".htm", 112 ".xhtml", ".java", ".c", ".cpp", ".php", ".asm", ".sh", 113 ".properties", ".kml", ".gpx", ".loc" }; 114 115 protected static final String LUCENE_ID = "id"; 116 protected static final String LUCENE_PAGE_CONTENTS = "contents"; 117 protected static final String LUCENE_AUTHOR = "author"; 118 protected static final String LUCENE_ATTACHMENTS = "attachment"; 119 protected static final String LUCENE_PAGE_NAME = "name"; 120 121 private String m_luceneDirectory; 122 protected Vector<Object[]> m_updates = new Vector<Object[]>(); // Vector because multi-threaded. 123 124 /** Maximum number of fragments from search matches. */ 125 private static final int MAX_FRAGMENTS = 3; 126 127 /** The maximum number of hits to return from searches. */ 128 public static final int MAX_SEARCH_HITS = 99999; 129 130 private static String c_punctuationSpaces = StringUtils.repeat(" ", MarkupParser.PUNCTUATION_CHARS_ALLOWED.length() ); 131 132 /** 133 * {@inheritDoc} 134 */ 135 public void initialize(WikiEngine engine, Properties props) 136 throws NoRequiredPropertyException, IOException 137 { 138 m_engine = engine; 139 140 m_luceneDirectory = engine.getWorkDir()+File.separator+LUCENE_DIR; 141 142 int initialDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY ); 143 int indexDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY ); 144 145 m_analyzerClass = TextUtil.getStringProperty( props, PROP_LUCENE_ANALYZER, m_analyzerClass ); 146 // FIXME: Just to be simple for now, we will do full reindex 147 // only if no files are in lucene directory. 148 149 File dir = new File(m_luceneDirectory); 150 151 log.info("Lucene enabled, cache will be in: "+dir.getAbsolutePath()); 152 153 try 154 { 155 if( !dir.exists() ) 156 { 157 dir.mkdirs(); 158 } 159 160 if( !dir.exists() || !dir.canWrite() || !dir.canRead() ) 161 { 162 log.error("Cannot write to Lucene directory, disabling Lucene: "+dir.getAbsolutePath()); 163 throw new IOException( "Invalid Lucene directory." ); 164 } 165 166 String[] filelist = dir.list(); 167 168 if( filelist == null ) 169 { 170 throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath()); 171 } 172 } 173 catch ( IOException e ) 174 { 175 log.error("Problem while creating Lucene index - not using Lucene.", e); 176 } 177 178 // Start the Lucene update thread, which waits first 179 // for a little while before starting to go through 180 // the Lucene "pages that need updating". 181 LuceneUpdater updater = new LuceneUpdater( m_engine, this, initialDelay, indexDelay ); 182 updater.start(); 183 } 184 185 /** 186 * Returns the handling engine. 187 * 188 * @return Current WikiEngine 189 */ 190 protected WikiEngine getEngine() 191 { 192 return m_engine; 193 } 194 195 /** 196 * Performs a full Lucene reindex, if necessary. 197 * 198 * @throws IOException If there's a problem during indexing 199 */ 200 protected void doFullLuceneReindex() 201 throws IOException 202 { 203 File dir = new File(m_luceneDirectory); 204 205 String[] filelist = dir.list(); 206 207 if( filelist == null ) 208 { 209 throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath()); 210 } 211 212 try 213 { 214 if( filelist.length == 0 ) 215 { 216 // 217 // No files? Reindex! 218 // 219 Date start = new Date(); 220 IndexWriter writer = null; 221 222 log.info("Starting Lucene reindexing, this can take a couple of minutes..."); 223 224 Directory luceneDir = new SimpleFSDirectory(dir, null); 225 226 try 227 { 228 writer = getIndexWriter( luceneDir ); 229 Collection allPages = m_engine.getPageManager().getAllPages(); 230 231 for( Iterator iterator = allPages.iterator(); iterator.hasNext(); ) 232 { 233 WikiPage page = (WikiPage) iterator.next(); 234 235 try 236 { 237 String text = m_engine.getPageManager().getPageText( page.getName(), 238 WikiProvider.LATEST_VERSION ); 239 luceneIndexPage( page, text, writer ); 240 } 241 catch( IOException e ) 242 { 243 log.warn( "Unable to index page " + page.getName() + ", continuing to next ", e ); 244 } 245 } 246 247 Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments(); 248 for( Iterator iterator = allAttachments.iterator(); iterator.hasNext(); ) 249 { 250 Attachment att = (Attachment) iterator.next(); 251 252 try 253 { 254 String text = getAttachmentContent( att.getName(), WikiProvider.LATEST_VERSION ); 255 luceneIndexPage( att, text, writer ); 256 } 257 catch( IOException e ) 258 { 259 log.warn( "Unable to index attachment " + att.getName() + ", continuing to next", e ); 260 } 261 } 262 263 } 264 finally 265 { 266 close( writer ); 267 } 268 269 Date end = new Date(); 270 log.info( "Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds." ); 271 } 272 else 273 { 274 log.info("Files found in Lucene directory, not reindexing."); 275 } 276 } 277 catch( NoClassDefFoundError e ) 278 { 279 log.info("Lucene libraries do not exist - not using Lucene."); 280 } 281 catch ( IOException e ) 282 { 283 log.error("Problem while creating Lucene index - not using Lucene.", e); 284 } 285 catch ( ProviderException e ) 286 { 287 log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e); 288 throw new IllegalArgumentException("unable to create Lucene index"); 289 } 290 catch( Exception e ) 291 { 292 log.error("Unable to start lucene",e); 293 } 294 295 } 296 297 /** 298 * Fetches the attachment content from the repository. 299 * Content is flat text that can be used for indexing/searching or display 300 * 301 * @param attachmentName Name of the attachment. 302 * @param version The version of the attachment. 303 * 304 * @return the content of the Attachment as a String. 305 */ 306 protected String getAttachmentContent( String attachmentName, int version ) 307 { 308 AttachmentManager mgr = m_engine.getAttachmentManager(); 309 310 try 311 { 312 Attachment att = mgr.getAttachmentInfo( attachmentName, version ); 313 //FIXME: Find out why sometimes att is null 314 if(att != null) 315 { 316 return getAttachmentContent( att ); 317 } 318 } 319 catch (ProviderException e) 320 { 321 log.error("Attachment cannot be loaded", e); 322 } 323 // Something was wrong, no result is returned. 324 return null; 325 } 326 327 /** 328 * @param att Attachment to get content for. Filename extension is used to determine the type of the attachment. 329 * @return String representing the content of the file. 330 * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing. 331 * This should be replaced /moved to Attachment search providers or some other 'pluggable' wat to search attachments 332 */ 333 protected String getAttachmentContent( Attachment att ) 334 { 335 AttachmentManager mgr = m_engine.getAttachmentManager(); 336 //FIXME: Add attachment plugin structure 337 338 String filename = att.getFileName(); 339 340 boolean searchSuffix = false; 341 for( String suffix : SEARCHABLE_FILE_SUFFIXES ) 342 { 343 if( filename.endsWith( suffix ) ) 344 { 345 searchSuffix = true; 346 } 347 } 348 349 String out = null; 350 if( searchSuffix ) 351 { 352 InputStream attStream = null; 353 StringWriter sout = new StringWriter(); 354 355 try 356 { 357 attStream = mgr.getAttachmentStream( att ); 358 FileUtil.copyContents( new InputStreamReader(attStream), sout ); 359 out = sout.toString(); 360 } 361 catch (ProviderException e) 362 { 363 log.error("Attachment cannot be loaded", e); 364 } 365 catch (IOException e) 366 { 367 log.error("Attachment cannot be loaded", e); 368 } 369 finally 370 { 371 IOUtils.closeQuietly( attStream ); 372 IOUtils.closeQuietly( sout ); 373 } 374 } 375 376 return out; 377 } 378 379 /** 380 * Updates the lucene index for a single page. 381 * 382 * @param page The WikiPage to check 383 * @param text The page text to index. 384 */ 385 protected synchronized void updateLuceneIndex( WikiPage page, String text ) 386 { 387 IndexWriter writer = null; 388 389 log.debug("Updating Lucene index for page '" + page.getName() + "'..."); 390 391 Directory luceneDir = null; 392 try 393 { 394 pageRemoved( page ); 395 396 // Now add back the new version. 397 luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null); 398 writer = getIndexWriter( luceneDir ); 399 400 luceneIndexPage( page, text, writer ); 401 } 402 catch ( IOException e ) 403 { 404 log.error("Unable to update page '" + page.getName() + "' from Lucene index", e); 405 // reindexPage( page ); 406 } 407 catch( Exception e ) 408 { 409 log.error("Unexpected Lucene exception - please check configuration!",e); 410 // reindexPage( page ); 411 } 412 finally 413 { 414 close( writer ); 415 } 416 417 log.debug("Done updating Lucene index for page '" + page.getName() + "'."); 418 } 419 420 421 private Analyzer getLuceneAnalyzer() throws ProviderException 422 { 423 try 424 { 425 Class< ? > clazz = ClassUtil.findClass( "", m_analyzerClass ); 426 Constructor< ? > constructor = clazz.getConstructor( Version.LUCENE_47.getClass() ); 427 Analyzer analyzer = (Analyzer) constructor.newInstance( Version.LUCENE_47 ); 428 return analyzer; 429 } 430 catch( Exception e ) 431 { 432 String msg = "Could not get LuceneAnalyzer class " + m_analyzerClass + ", reason: "; 433 log.error( msg, e ); 434 throw new ProviderException( msg + e ); 435 } 436 } 437 438 /** 439 * Indexes page using the given IndexWriter. 440 * 441 * @param page WikiPage 442 * @param text Page text to index 443 * @param writer The Lucene IndexWriter to use for indexing 444 * @return the created index Document 445 * @throws IOException If there's an indexing problem 446 */ 447 protected Document luceneIndexPage( WikiPage page, String text, IndexWriter writer ) 448 throws IOException 449 { 450 if( log.isDebugEnabled() ) log.debug( "Indexing "+page.getName()+"..." ); 451 452 // make a new, empty document 453 Document doc = new Document(); 454 455 if( text == null ) return doc; 456 457 // Raw name is the keyword we'll use to refer to this document for updates. 458 Field field = new Field( LUCENE_ID, page.getName(), StringField.TYPE_STORED ); 459 doc.add( field ); 460 461 // Body text. It is stored in the doc for search contexts. 462 field = new Field( LUCENE_PAGE_CONTENTS, text, TextField.TYPE_STORED ); 463 doc.add( field ); 464 465 // Allow searching by page name. Both beautified and raw 466 String unTokenizedTitle = StringUtils.replaceChars( page.getName(), 467 MarkupParser.PUNCTUATION_CHARS_ALLOWED, 468 c_punctuationSpaces ); 469 470 field = new Field( LUCENE_PAGE_NAME, 471 TextUtil.beautifyString( page.getName() ) + " " + unTokenizedTitle, 472 TextField.TYPE_STORED ); 473 doc.add( field ); 474 475 // Allow searching by authorname 476 477 if( page.getAuthor() != null ) 478 { 479 field = new Field( LUCENE_AUTHOR, page.getAuthor(), TextField.TYPE_STORED ); 480 doc.add( field ); 481 } 482 483 // Now add the names of the attachments of this page 484 try 485 { 486 Collection attachments = m_engine.getAttachmentManager().listAttachments(page); 487 String attachmentNames = ""; 488 489 for( Iterator it = attachments.iterator(); it.hasNext(); ) 490 { 491 Attachment att = (Attachment) it.next(); 492 attachmentNames += att.getName() + ";"; 493 } 494 field = new Field( LUCENE_ATTACHMENTS, attachmentNames, TextField.TYPE_STORED ); 495 doc.add( field ); 496 497 } 498 catch(ProviderException e) 499 { 500 // Unable to read attachments 501 log.error("Failed to get attachments for page", e); 502 } 503 writer.addDocument(doc); 504 505 return doc; 506 } 507 508 /** 509 * {@inheritDoc} 510 */ 511 public void pageRemoved( WikiPage page ) 512 { 513 IndexWriter writer = null; 514 try 515 { 516 Directory luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null); 517 writer = getIndexWriter( luceneDir ); 518 Query query = new TermQuery( new Term( LUCENE_ID, page.getName() ) ); 519 writer.deleteDocuments( query ); 520 } 521 catch ( Exception e ) 522 { 523 log.error("Unable to remove page '" + page.getName() + "' from Lucene index", e); 524 } 525 finally 526 { 527 close( writer ); 528 } 529 } 530 531 IndexWriter getIndexWriter( Directory luceneDir ) throws CorruptIndexException, 532 LockObtainFailedException, IOException, ProviderException 533 { 534 IndexWriter writer = null; 535 IndexWriterConfig writerConfig = new IndexWriterConfig( Version.LUCENE_47, getLuceneAnalyzer() ); 536 writerConfig.setOpenMode( OpenMode.CREATE_OR_APPEND ); 537 writer = new IndexWriter( luceneDir, writerConfig ); 538 539 // writer.setInfoStream( System.out ); 540 return writer; 541 } 542 543 void close( IndexWriter writer ) 544 { 545 try 546 { 547 if( writer != null ) 548 { 549 writer.close( true ); 550 } 551 } 552 catch( IOException e ) 553 { 554 log.error( e ); 555 } 556 } 557 558 559 /** 560 * Adds a page-text pair to the lucene update queue. Safe to call always 561 * 562 * @param page WikiPage to add to the update queue. 563 */ 564 public void reindexPage( WikiPage page ) 565 { 566 if( page != null ) 567 { 568 String text; 569 570 // TODO: Think if this was better done in the thread itself? 571 572 if( page instanceof Attachment ) 573 { 574 text = getAttachmentContent( (Attachment) page ); 575 } 576 else 577 { 578 text = m_engine.getPureText( page ); 579 } 580 581 if( text != null ) 582 { 583 // Add work item to m_updates queue. 584 Object[] pair = new Object[2]; 585 pair[0] = page; 586 pair[1] = text; 587 m_updates.add(pair); 588 log.debug("Scheduling page " + page.getName() + " for index update"); 589 } 590 } 591 } 592 593 /** 594 * {@inheritDoc} 595 */ 596 public Collection findPages( String query, WikiContext wikiContext ) 597 throws ProviderException 598 { 599 return findPages( query, FLAG_CONTEXTS, wikiContext ); 600 } 601 602 /** 603 * Create contexts also. Generating contexts can be expensive, 604 * so they're not on by default. 605 */ 606 public static final int FLAG_CONTEXTS = 0x01; 607 608 /** 609 * Searches pages using a particular combination of flags. 610 * 611 * @param query The query to perform in Lucene query language 612 * @param flags A set of flags 613 * @return A Collection of SearchResult instances 614 * @throws ProviderException if there is a problem with the backend 615 */ 616 public Collection findPages( String query, int flags, WikiContext wikiContext ) 617 throws ProviderException 618 { 619 IndexSearcher searcher = null; 620 ArrayList<SearchResult> list = null; 621 Highlighter highlighter = null; 622 623 try 624 { 625 String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS }; 626 QueryParser qp = new MultiFieldQueryParser( Version.LUCENE_47, queryfields, getLuceneAnalyzer() ); 627 628 //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() ); 629 Query luceneQuery = qp.parse( query ); 630 631 if( (flags & FLAG_CONTEXTS) != 0 ) 632 { 633 highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"), 634 new SimpleHTMLEncoder(), 635 new QueryScorer(luceneQuery)); 636 } 637 638 try 639 { 640 File dir = new File(m_luceneDirectory); 641 Directory luceneDir = new SimpleFSDirectory(dir, null); 642 IndexReader reader = DirectoryReader.open(luceneDir); 643 searcher = new IndexSearcher(reader); 644 } 645 catch( Exception ex ) 646 { 647 log.info("Lucene not yet ready; indexing not started",ex); 648 return null; 649 } 650 651 ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs; 652 653 AuthorizationManager mgr = m_engine.getAuthorizationManager(); 654 655 list = new ArrayList<SearchResult>(hits.length); 656 for ( int curr = 0; curr < hits.length; curr++ ) 657 { 658 int docID = hits[curr].doc; 659 Document doc = searcher.doc( docID ); 660 String pageName = doc.get(LUCENE_ID); 661 WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION); 662 663 if(page != null) 664 { 665 if(page instanceof Attachment) 666 { 667 // Currently attachments don't look nice on the search-results page 668 // When the search-results are cleaned up this can be enabled again. 669 } 670 671 PagePermission pp = new PagePermission( page, PagePermission.VIEW_ACTION ); 672 if( mgr.checkPermission( wikiContext.getWikiSession(), pp ) ) { 673 674 int score = (int)(hits[curr].score * 100); 675 676 677 // Get highlighted search contexts 678 String text = doc.get(LUCENE_PAGE_CONTENTS); 679 680 String[] fragments = new String[0]; 681 if( text != null && highlighter != null ) { 682 TokenStream tokenStream = getLuceneAnalyzer() 683 .tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text)); 684 fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS); 685 } 686 687 SearchResult result = new SearchResultImpl( page, score, fragments ); 688 list.add(result); 689 } 690 } 691 else 692 { 693 log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache"); 694 pageRemoved(new WikiPage( m_engine, pageName )); 695 } 696 } 697 } 698 catch( IOException e ) 699 { 700 log.error("Failed during lucene search",e); 701 } 702 catch( ParseException e ) 703 { 704 log.info("Broken query; cannot parse query ",e); 705 706 throw new ProviderException("You have entered a query Lucene cannot process: "+e.getMessage()); 707 } 708 catch( InvalidTokenOffsetsException e ) 709 { 710 log.error("Tokens are incompatible with provided text ",e); 711 } 712 finally 713 { 714 if( searcher != null ) 715 { 716 try 717 { 718 searcher.getIndexReader().close(); 719 } 720 catch( IOException e ) 721 { 722 log.error( e ); 723 } 724 } 725 } 726 727 return list; 728 } 729 730 /** 731 * {@inheritDoc} 732 */ 733 public String getProviderInfo() 734 { 735 return "LuceneSearchProvider"; 736 } 737 738 /** 739 * Updater thread that updates Lucene indexes. 740 */ 741 private static final class LuceneUpdater extends WikiBackgroundThread 742 { 743 protected static final int INDEX_DELAY = 5; 744 protected static final int INITIAL_DELAY = 60; 745 private final LuceneSearchProvider m_provider; 746 747 private int m_initialDelay; 748 749 private WatchDog m_watchdog; 750 751 private LuceneUpdater( WikiEngine engine, LuceneSearchProvider provider, 752 int initialDelay, int indexDelay ) 753 { 754 super( engine, indexDelay ); 755 m_provider = provider; 756 setName("JSPWiki Lucene Indexer"); 757 } 758 759 public void startupTask() throws Exception 760 { 761 m_watchdog = getEngine().getCurrentWatchDog(); 762 763 // Sleep initially... 764 try 765 { 766 Thread.sleep( m_initialDelay * 1000L ); 767 } 768 catch( InterruptedException e ) 769 { 770 throw new InternalWikiException("Interrupted while waiting to start.", e); 771 } 772 773 m_watchdog.enterState("Full reindex"); 774 // Reindex everything 775 m_provider.doFullLuceneReindex(); 776 m_watchdog.exitState(); 777 } 778 779 public void backgroundTask() throws Exception 780 { 781 m_watchdog.enterState("Emptying index queue", 60); 782 783 synchronized ( m_provider.m_updates ) 784 { 785 while( m_provider.m_updates.size() > 0 ) 786 { 787 Object[] pair = m_provider.m_updates.remove(0); 788 WikiPage page = ( WikiPage ) pair[0]; 789 String text = ( String ) pair[1]; 790 m_provider.updateLuceneIndex(page, text); 791 } 792 } 793 794 m_watchdog.exitState(); 795 } 796 797 } 798 799 // FIXME: This class is dumb; needs to have a better implementation 800 private static class SearchResultImpl 801 implements SearchResult 802 { 803 private WikiPage m_page; 804 private int m_score; 805 private String[] m_contexts; 806 807 public SearchResultImpl( WikiPage page, int score, String[] contexts ) 808 { 809 m_page = page; 810 m_score = score; 811 m_contexts = contexts != null ? contexts.clone() : null; 812 } 813 814 public WikiPage getPage() 815 { 816 return m_page; 817 } 818 819 /* (non-Javadoc) 820 * @see org.apache.wiki.SearchResult#getScore() 821 */ 822 public int getScore() 823 { 824 return m_score; 825 } 826 827 828 public String[] getContexts() 829 { 830 return m_contexts; 831 } 832 } 833}