001    /*
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.    
018     */
019    package org.apache.wiki.search;
020    
021    import java.io.File;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.io.InputStreamReader;
025    import java.io.StringReader;
026    import java.io.StringWriter;
027    import java.lang.reflect.Constructor;
028    import java.util.ArrayList;
029    import java.util.Collection;
030    import java.util.Date;
031    import java.util.Iterator;
032    import java.util.Properties;
033    import java.util.Vector;
034    
035    import org.apache.commons.io.IOUtils;
036    import org.apache.commons.lang.StringUtils;
037    import org.apache.log4j.Logger;
038    import org.apache.lucene.analysis.Analyzer;
039    import org.apache.lucene.analysis.TokenStream;
040    import org.apache.lucene.document.Document;
041    import org.apache.lucene.document.Field;
042    import org.apache.lucene.document.StringField;
043    import org.apache.lucene.document.TextField;
044    import org.apache.lucene.index.CorruptIndexException;
045    import org.apache.lucene.index.DirectoryReader;
046    import org.apache.lucene.index.IndexReader;
047    import org.apache.lucene.index.IndexWriter;
048    import org.apache.lucene.index.IndexWriterConfig;
049    import org.apache.lucene.index.IndexWriterConfig.OpenMode;
050    import org.apache.lucene.index.Term;
051    import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
052    import org.apache.lucene.queryparser.classic.ParseException;
053    import org.apache.lucene.queryparser.classic.QueryParser;
054    import org.apache.lucene.search.IndexSearcher;
055    import org.apache.lucene.search.Query;
056    import org.apache.lucene.search.ScoreDoc;
057    import org.apache.lucene.search.TermQuery;
058    import org.apache.lucene.search.highlight.Highlighter;
059    import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
060    import org.apache.lucene.search.highlight.QueryScorer;
061    import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
062    import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
063    import org.apache.lucene.store.Directory;
064    import org.apache.lucene.store.LockObtainFailedException;
065    import org.apache.lucene.store.SimpleFSDirectory;
066    import org.apache.lucene.util.Version;
067    import org.apache.wiki.InternalWikiException;
068    import org.apache.wiki.WatchDog;
069    import org.apache.wiki.WikiBackgroundThread;
070    import org.apache.wiki.WikiEngine;
071    import org.apache.wiki.WikiPage;
072    import org.apache.wiki.WikiProvider;
073    import org.apache.wiki.api.exceptions.NoRequiredPropertyException;
074    import org.apache.wiki.api.exceptions.ProviderException;
075    import org.apache.wiki.attachment.Attachment;
076    import org.apache.wiki.attachment.AttachmentManager;
077    import org.apache.wiki.parser.MarkupParser;
078    import org.apache.wiki.providers.WikiPageProvider;
079    import org.apache.wiki.util.ClassUtil;
080    import org.apache.wiki.util.FileUtil;
081    import org.apache.wiki.util.TextUtil;
082    
083    
084    /**
085     *  Interface for the search providers that handle searching the Wiki
086     *
087     *  @since 2.2.21.
088     */
089    public class LuceneSearchProvider implements SearchProvider {
090    
091        protected static final Logger log = Logger.getLogger(LuceneSearchProvider.class);
092    
093        private WikiEngine m_engine;
094    
095        // Lucene properties.
096    
097        /** Which analyzer to use.  Default is StandardAnalyzer. */
098        public static final String PROP_LUCENE_ANALYZER    = "jspwiki.lucene.analyzer";
099    
100        private static final String PROP_LUCENE_INDEXDELAY   = "jspwiki.lucene.indexdelay";
101        private static final String PROP_LUCENE_INITIALDELAY = "jspwiki.lucene.initialdelay";
102    
103        private String m_analyzerClass = "org.apache.lucene.analysis.standard.ClassicAnalyzer";
104    
105        private static final String LUCENE_DIR             = "lucene";
106    
107        /** These attachment file suffixes will be indexed. */
108        public static final String[] SEARCHABLE_FILE_SUFFIXES = new String[] { ".txt", ".ini", ".xml", ".html", "htm", ".mm", ".htm",
109                                                                              ".xhtml", ".java", ".c", ".cpp", ".php", ".asm", ".sh",
110                                                                              ".properties", ".kml", ".gpx", ".loc" };
111    
112        protected static final String LUCENE_ID            = "id";
113        protected static final String LUCENE_PAGE_CONTENTS = "contents";
114        protected static final String LUCENE_AUTHOR        = "author";
115        protected static final String LUCENE_ATTACHMENTS   = "attachment";
116        protected static final String LUCENE_PAGE_NAME     = "name";
117    
118        private String           m_luceneDirectory;
119        protected Vector<Object[]> m_updates = new Vector<Object[]>(); // Vector because multi-threaded.
120    
121        /** Maximum number of fragments from search matches. */
122        private static final int MAX_FRAGMENTS = 3;
123    
124        /** The maximum number of hits to return from searches. */
125        public static final int MAX_SEARCH_HITS = 99999;
126        
127        private static String c_punctuationSpaces = StringUtils.repeat(" ", MarkupParser.PUNCTUATION_CHARS_ALLOWED.length() );
128    
129        /**
130         *  {@inheritDoc}
131         */
132        public void initialize(WikiEngine engine, Properties props)
133                throws NoRequiredPropertyException, IOException
134        {
135            m_engine = engine;
136    
137            m_luceneDirectory = engine.getWorkDir()+File.separator+LUCENE_DIR;
138    
139            int initialDelay = TextUtil.getIntegerProperty( props, PROP_LUCENE_INITIALDELAY, LuceneUpdater.INITIAL_DELAY );
140            int indexDelay   = TextUtil.getIntegerProperty( props, PROP_LUCENE_INDEXDELAY, LuceneUpdater.INDEX_DELAY );
141    
142            m_analyzerClass = TextUtil.getStringProperty( props, PROP_LUCENE_ANALYZER, m_analyzerClass );
143            // FIXME: Just to be simple for now, we will do full reindex
144            // only if no files are in lucene directory.
145    
146            File dir = new File(m_luceneDirectory);
147    
148            log.info("Lucene enabled, cache will be in: "+dir.getAbsolutePath());
149    
150            try
151            {
152                if( !dir.exists() )
153                {
154                    dir.mkdirs();
155                }
156    
157                if( !dir.exists() || !dir.canWrite() || !dir.canRead() )
158                {
159                    log.error("Cannot write to Lucene directory, disabling Lucene: "+dir.getAbsolutePath());
160                    throw new IOException( "Invalid Lucene directory." );
161                }
162    
163                String[] filelist = dir.list();
164    
165                if( filelist == null )
166                {
167                    throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath());
168                }
169            }
170            catch ( IOException e )
171            {
172                log.error("Problem while creating Lucene index - not using Lucene.", e);
173            }
174    
175            // Start the Lucene update thread, which waits first
176            // for a little while before starting to go through
177            // the Lucene "pages that need updating".
178            LuceneUpdater updater = new LuceneUpdater( m_engine, this, initialDelay, indexDelay );
179            updater.start();
180        }
181    
182        /**
183         *  Returns the handling engine.
184         *
185         *  @return Current WikiEngine
186         */
187        protected WikiEngine getEngine()
188        {
189            return m_engine;
190        }
191    
192        /**
193         *  Performs a full Lucene reindex, if necessary.
194         *
195         *  @throws IOException If there's a problem during indexing
196         */
197        protected void doFullLuceneReindex()
198            throws IOException
199        {
200            File dir = new File(m_luceneDirectory);
201    
202            String[] filelist = dir.list();
203    
204            if( filelist == null )
205            {
206                throw new IOException( "Invalid Lucene directory: cannot produce listing: "+dir.getAbsolutePath());
207            }
208    
209            try
210            {
211                if( filelist.length == 0 )
212                {
213                    //
214                    //  No files? Reindex!
215                    //
216                    Date start = new Date();
217                    IndexWriter writer = null;
218    
219                    log.info("Starting Lucene reindexing, this can take a couple of minutes...");
220    
221                    Directory luceneDir = new SimpleFSDirectory(dir, null);
222                    
223                    try
224                    {
225                        writer = getIndexWriter( luceneDir );
226                        Collection allPages = m_engine.getPageManager().getAllPages();
227    
228                        for( Iterator iterator = allPages.iterator(); iterator.hasNext(); )
229                        {
230                            WikiPage page = (WikiPage) iterator.next();
231                            
232                            try
233                            {
234                                String text = m_engine.getPageManager().getPageText( page.getName(),
235                                                                                     WikiProvider.LATEST_VERSION );
236                                luceneIndexPage( page, text, writer );
237                            }
238                            catch( IOException e )
239                            {
240                                log.warn( "Unable to index page " + page.getName() + ", continuing to next ", e );
241                            }
242                        }
243    
244                        Collection allAttachments = m_engine.getAttachmentManager().getAllAttachments();
245                        for( Iterator iterator = allAttachments.iterator(); iterator.hasNext(); )
246                        {
247                            Attachment att = (Attachment) iterator.next();
248                            
249                            try
250                            {
251                                String text = getAttachmentContent( att.getName(), WikiProvider.LATEST_VERSION );
252                                luceneIndexPage( att, text, writer );
253                            }
254                            catch( IOException e )
255                            {
256                                log.warn( "Unable to index attachment " + att.getName() + ", continuing to next", e );
257                            }
258                        }
259    
260                    }
261                    finally
262                    {
263                        close( writer );
264                    }
265    
266                    Date end = new Date();
267                    log.info( "Full Lucene index finished in " + (end.getTime() - start.getTime()) + " milliseconds." );
268                }
269                else
270                {
271                    log.info("Files found in Lucene directory, not reindexing.");
272                }
273            }
274            catch( NoClassDefFoundError e )
275            {
276                log.info("Lucene libraries do not exist - not using Lucene.");
277            }
278            catch ( IOException e )
279            {
280                log.error("Problem while creating Lucene index - not using Lucene.", e);
281            }
282            catch ( ProviderException e )
283            {
284                log.error("Problem reading pages while creating Lucene index (JSPWiki won't start.)", e);
285                throw new IllegalArgumentException("unable to create Lucene index");
286            }
287            catch( Exception e )
288            {
289                log.error("Unable to start lucene",e);
290            }
291    
292        }
293    
294        /**
295         *  Fetches the attachment content from the repository.
296         *  Content is flat text that can be used for indexing/searching or display
297         *  
298         *  @param attachmentName Name of the attachment.
299         *  @param version The version of the attachment.
300         *  
301         *  @return the content of the Attachment as a String.
302         */
303        protected String getAttachmentContent( String attachmentName, int version )
304        {
305            AttachmentManager mgr = m_engine.getAttachmentManager();
306    
307            try
308            {
309                Attachment att = mgr.getAttachmentInfo( attachmentName, version );
310                //FIXME: Find out why sometimes att is null
311                if(att != null)
312                {
313                    return getAttachmentContent( att );
314                }
315            }
316            catch (ProviderException e)
317            {
318                log.error("Attachment cannot be loaded", e);
319            }
320            // Something was wrong, no result is returned.
321            return null;
322        }
323    
324        /**
325         * @param att Attachment to get content for. Filename extension is used to determine the type of the attachment.
326         * @return String representing the content of the file.
327         * FIXME This is a very simple implementation of some text-based attachment, mainly used for testing.
328         * This should be replaced /moved to Attachment search providers or some other 'pluggable' wat to search attachments
329         */
330        protected String getAttachmentContent( Attachment att )
331        {
332            AttachmentManager mgr = m_engine.getAttachmentManager();
333            //FIXME: Add attachment plugin structure
334    
335            String filename = att.getFileName();
336    
337            boolean searchSuffix = false;
338            for( String suffix : SEARCHABLE_FILE_SUFFIXES )
339            {
340                if( filename.endsWith( suffix ) )
341                {
342                    searchSuffix = true;
343                }
344            }
345    
346            String out = null;
347            if( searchSuffix )
348            {
349                InputStream attStream = null;
350                StringWriter sout = new StringWriter();
351                
352                try
353                {
354                    attStream = mgr.getAttachmentStream( att );
355                    FileUtil.copyContents( new InputStreamReader(attStream), sout );
356                    out = sout.toString();
357                }
358                catch (ProviderException e)
359                {
360                    log.error("Attachment cannot be loaded", e);
361                }
362                catch (IOException e)
363                {
364                    log.error("Attachment cannot be loaded", e);
365                }
366                finally 
367                {
368                    IOUtils.closeQuietly( attStream );
369                    IOUtils.closeQuietly( sout );
370                }
371            }
372    
373            return out;
374        }
375    
376        /**
377         *  Updates the lucene index for a single page.
378         *
379         *  @param page The WikiPage to check
380         *  @param text The page text to index.
381         */
382        protected synchronized void updateLuceneIndex( WikiPage page, String text )
383        {
384            IndexWriter writer = null;
385    
386            log.debug("Updating Lucene index for page '" + page.getName() + "'...");
387    
388            Directory luceneDir = null;
389            try
390            {
391                pageRemoved( page );
392    
393                // Now add back the new version.
394                luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null);
395                writer = getIndexWriter( luceneDir );
396                
397                luceneIndexPage( page, text, writer );
398            }
399            catch ( IOException e )
400            {
401                log.error("Unable to update page '" + page.getName() + "' from Lucene index", e);
402                // reindexPage( page );
403            }
404            catch( Exception e )
405            {
406                log.error("Unexpected Lucene exception - please check configuration!",e);
407                // reindexPage( page );
408            }
409            finally
410            {
411                close( writer );
412            }
413    
414            log.debug("Done updating Lucene index for page '" + page.getName() + "'.");
415        }
416    
417    
418        private Analyzer getLuceneAnalyzer() throws ProviderException
419        {
420            try
421            {
422                Class< ? > clazz = ClassUtil.findClass( "", m_analyzerClass );
423                Constructor< ? > constructor = clazz.getConstructor( Version.LUCENE_47.getClass() );
424                Analyzer analyzer = (Analyzer) constructor.newInstance( Version.LUCENE_47 );
425                return analyzer;
426            }
427            catch( Exception e )
428            {
429                String msg = "Could not get LuceneAnalyzer class " + m_analyzerClass + ", reason: ";
430                log.error( msg, e );
431                throw new ProviderException( msg + e );
432            }
433        }
434    
435        /**
436         *  Indexes page using the given IndexWriter.
437         *
438         *  @param page WikiPage
439         *  @param text Page text to index
440         *  @param writer The Lucene IndexWriter to use for indexing
441         *  @return the created index Document
442         *  @throws IOException If there's an indexing problem
443         */
444        protected Document luceneIndexPage( WikiPage page, String text, IndexWriter writer )
445            throws IOException
446        {
447            if( log.isDebugEnabled() ) log.debug( "Indexing "+page.getName()+"..." );
448            
449            // make a new, empty document
450            Document doc = new Document();
451    
452            if( text == null ) return doc;
453    
454            // Raw name is the keyword we'll use to refer to this document for updates.
455            Field field = new Field( LUCENE_ID, page.getName(), StringField.TYPE_STORED );
456            doc.add( field );
457    
458            // Body text.  It is stored in the doc for search contexts.
459            field = new Field( LUCENE_PAGE_CONTENTS, text, TextField.TYPE_STORED );
460            doc.add( field );
461    
462            // Allow searching by page name. Both beautified and raw
463            String unTokenizedTitle = StringUtils.replaceChars( page.getName(),
464                                                                MarkupParser.PUNCTUATION_CHARS_ALLOWED,
465                                                                c_punctuationSpaces );
466    
467            field = new Field( LUCENE_PAGE_NAME,
468                               TextUtil.beautifyString( page.getName() ) + " " + unTokenizedTitle,
469                               TextField.TYPE_STORED );
470            doc.add( field );
471    
472            // Allow searching by authorname
473    
474            if( page.getAuthor() != null )
475            {
476                field = new Field( LUCENE_AUTHOR, page.getAuthor(), TextField.TYPE_STORED );
477                doc.add( field );
478            }
479    
480            // Now add the names of the attachments of this page
481            try
482            {
483                Collection attachments = m_engine.getAttachmentManager().listAttachments(page);
484                String attachmentNames = "";
485    
486                for( Iterator it = attachments.iterator(); it.hasNext(); )
487                {
488                    Attachment att = (Attachment) it.next();
489                    attachmentNames += att.getName() + ";";
490                }
491                field = new Field( LUCENE_ATTACHMENTS, attachmentNames, TextField.TYPE_STORED );
492                doc.add( field );
493    
494            }
495            catch(ProviderException e)
496            {
497                // Unable to read attachments
498                log.error("Failed to get attachments for page", e);
499            }
500            writer.addDocument(doc);
501    
502            return doc;
503        }
504    
505        /**
506         *  {@inheritDoc}
507         */
508        public void pageRemoved( WikiPage page )
509        {
510            IndexWriter writer = null;
511            try
512            {
513                Directory luceneDir = new SimpleFSDirectory(new File(m_luceneDirectory), null);
514                writer = getIndexWriter( luceneDir );
515                Query query = new TermQuery( new Term( LUCENE_ID, page.getName() ) );
516                writer.deleteDocuments( query );
517            }
518            catch ( Exception e )
519            {
520                log.error("Unable to remove page '" + page.getName() + "' from Lucene index", e);
521            }
522            finally
523            {
524                close( writer );
525            }
526        }
527        
528        IndexWriter getIndexWriter( Directory luceneDir ) throws CorruptIndexException, 
529                LockObtainFailedException, IOException, ProviderException 
530        {
531            IndexWriter writer = null;
532            IndexWriterConfig writerConfig = new IndexWriterConfig( Version.LUCENE_47, getLuceneAnalyzer() );
533            writerConfig.setOpenMode( OpenMode.CREATE_OR_APPEND );
534            writer = new IndexWriter( luceneDir, writerConfig );
535            
536            // writer.setInfoStream( System.out );
537            return writer;
538        }
539        
540        void close( IndexWriter writer ) 
541        {
542            try
543            {
544                if( writer != null ) 
545                {
546                    writer.close( true );
547                }
548            }
549            catch( IOException e )
550            {
551                log.error( e );
552            }
553        }
554    
555    
556        /**
557         *  Adds a page-text pair to the lucene update queue.  Safe to call always
558         *
559         *  @param page WikiPage to add to the update queue.
560         */
561        public void reindexPage( WikiPage page )
562        {
563            if( page != null )
564            {
565                String text;
566    
567                // TODO: Think if this was better done in the thread itself?
568    
569                if( page instanceof Attachment )
570                {
571                    text = getAttachmentContent( (Attachment) page );
572                }
573                else
574                {
575                    text = m_engine.getPureText( page );
576                }
577    
578                if( text != null )
579                {
580                    // Add work item to m_updates queue.
581                    Object[] pair = new Object[2];
582                    pair[0] = page;
583                    pair[1] = text;
584                    m_updates.add(pair);
585                    log.debug("Scheduling page " + page.getName() + " for index update");
586                }
587            }
588        }
589    
590        /**
591         *  {@inheritDoc}
592         */
593        public Collection findPages( String query )
594            throws ProviderException
595        {
596            return findPages( query, FLAG_CONTEXTS );
597        }
598    
599        /**
600         *  Create contexts also.  Generating contexts can be expensive,
601         *  so they're not on by default.
602         */
603        public static final int FLAG_CONTEXTS = 0x01;
604    
605        /**
606         *  Searches pages using a particular combination of flags.
607         *
608         *  @param query The query to perform in Lucene query language
609         *  @param flags A set of flags
610         *  @return A Collection of SearchResult instances
611         *  @throws ProviderException if there is a problem with the backend
612         */
613        public Collection findPages( String query, int flags )
614            throws ProviderException
615        {
616            IndexSearcher  searcher = null;
617            ArrayList<SearchResult> list = null;
618            Highlighter highlighter = null;
619    
620            try
621            {
622                String[] queryfields = { LUCENE_PAGE_CONTENTS, LUCENE_PAGE_NAME, LUCENE_AUTHOR, LUCENE_ATTACHMENTS };
623                QueryParser qp = new MultiFieldQueryParser( Version.LUCENE_47, queryfields, getLuceneAnalyzer() );
624    
625                //QueryParser qp = new QueryParser( LUCENE_PAGE_CONTENTS, getLuceneAnalyzer() );
626                Query luceneQuery = qp.parse( query );
627    
628                if( (flags & FLAG_CONTEXTS) != 0 )
629                {
630                    highlighter = new Highlighter(new SimpleHTMLFormatter("<span class=\"searchmatch\">", "</span>"),
631                                                  new SimpleHTMLEncoder(),
632                                                  new QueryScorer(luceneQuery));
633                }
634    
635                try
636                {
637                    File dir = new File(m_luceneDirectory);
638                    Directory luceneDir = new SimpleFSDirectory(dir, null);
639                    IndexReader reader = DirectoryReader.open(luceneDir);
640                    searcher = new IndexSearcher(reader);
641                }
642                catch( Exception ex )
643                {
644                    log.info("Lucene not yet ready; indexing not started",ex);
645                    return null;
646                }
647    
648                ScoreDoc[] hits = searcher.search(luceneQuery, MAX_SEARCH_HITS).scoreDocs;
649    
650                list = new ArrayList<SearchResult>(hits.length);
651                for ( int curr = 0; curr < hits.length; curr++ )
652                {
653                    int docID = hits[curr].doc;
654                    Document doc = searcher.doc( docID );
655                    String pageName = doc.get(LUCENE_ID);
656                    WikiPage page = m_engine.getPage(pageName, WikiPageProvider.LATEST_VERSION);
657    
658                    if(page != null)
659                    {
660                        if(page instanceof Attachment)
661                        {
662                            // Currently attachments don't look nice on the search-results page
663                            // When the search-results are cleaned up this can be enabled again.
664                        }
665    
666                        int score = (int)(hits[curr].score * 100);
667    
668    
669                        // Get highlighted search contexts
670                        String text = doc.get(LUCENE_PAGE_CONTENTS);
671    
672                        String[] fragments = new String[0];
673                        if( text != null && highlighter != null )
674                        {
675                            TokenStream tokenStream = getLuceneAnalyzer()
676                            .tokenStream(LUCENE_PAGE_CONTENTS, new StringReader(text));
677                            fragments = highlighter.getBestFragments(tokenStream, text, MAX_FRAGMENTS);
678    
679                        }
680    
681                        SearchResult result = new SearchResultImpl( page, score, fragments );     
682                        list.add(result);
683                    }
684                    else
685                    {
686                        log.error("Lucene found a result page '" + pageName + "' that could not be loaded, removing from Lucene cache");
687                        pageRemoved(new WikiPage( m_engine, pageName ));
688                    }
689                }
690            }
691            catch( IOException e )
692            {
693                log.error("Failed during lucene search",e);
694            }
695            catch( ParseException e )
696            {
697                log.info("Broken query; cannot parse query ",e);
698    
699                throw new ProviderException("You have entered a query Lucene cannot process: "+e.getMessage());
700            }
701            catch( InvalidTokenOffsetsException e )
702            {
703                log.error("Tokens are incompatible with provided text ",e);
704            }
705            finally
706            {
707                if( searcher != null )
708                {
709                    try
710                    {
711                        searcher.getIndexReader().close();
712                    }
713                    catch( IOException e )
714                    {
715                        log.error( e );
716                    }
717                }
718            }
719    
720            return list;
721        }
722    
723        /**
724         *  {@inheritDoc}
725         */
726        public String getProviderInfo()
727        {
728            return "LuceneSearchProvider";
729        }
730    
731        /**
732         * Updater thread that updates Lucene indexes.
733         */
734        private static final class LuceneUpdater extends WikiBackgroundThread
735        {
736            protected static final int INDEX_DELAY    = 5;
737            protected static final int INITIAL_DELAY = 60;
738            private final LuceneSearchProvider m_provider;
739    
740            private int m_initialDelay;
741    
742            private WatchDog m_watchdog;
743    
744            private LuceneUpdater( WikiEngine engine, LuceneSearchProvider provider,
745                                   int initialDelay, int indexDelay )
746            {
747                super( engine, indexDelay );
748                m_provider = provider;
749                setName("JSPWiki Lucene Indexer");
750            }
751    
752            public void startupTask() throws Exception
753            {
754                m_watchdog = getEngine().getCurrentWatchDog();
755    
756                // Sleep initially...
757                try
758                {
759                    Thread.sleep( m_initialDelay * 1000L );
760                }
761                catch( InterruptedException e )
762                {
763                    throw new InternalWikiException("Interrupted while waiting to start.");
764                }
765    
766                m_watchdog.enterState("Full reindex");
767                // Reindex everything
768                m_provider.doFullLuceneReindex();
769                m_watchdog.exitState();
770            }
771    
772            public void backgroundTask() throws Exception
773            {
774                m_watchdog.enterState("Emptying index queue", 60);
775    
776                synchronized ( m_provider.m_updates )
777                {
778                    while( m_provider.m_updates.size() > 0 )
779                    {
780                        Object[] pair = m_provider.m_updates.remove(0);
781                        WikiPage page = ( WikiPage ) pair[0];
782                        String text = ( String ) pair[1];
783                        m_provider.updateLuceneIndex(page, text);
784                    }
785                }
786    
787                m_watchdog.exitState();
788            }
789    
790        }
791    
792        // FIXME: This class is dumb; needs to have a better implementation
793        private static class SearchResultImpl
794            implements SearchResult
795        {
796            private WikiPage m_page;
797            private int      m_score;
798            private String[] m_contexts;
799    
800            public SearchResultImpl( WikiPage page, int score, String[] contexts )
801            {
802                m_page  = page;
803                m_score = score;
804                m_contexts = contexts != null ? contexts.clone() : null;
805            }
806    
807            public WikiPage getPage()
808            {
809                return m_page;
810            }
811    
812            /* (non-Javadoc)
813             * @see org.apache.wiki.SearchResult#getScore()
814             */
815            public int getScore()
816            {
817                return m_score;
818            }
819    
820    
821            public String[] getContexts()
822            {
823                return m_contexts;
824            }
825        }
826    }