001    /*
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.   
018     */
019    package org.apache.wiki.search;
020    
021    import java.io.IOException;
022    import java.io.BufferedReader;
023    import java.io.StringReader;
024    
025    import org.apache.wiki.WikiEngine;
026    import org.apache.wiki.WikiPage;
027    
028    
029    /**
030     * SearchMatcher performs the task of matching a search query to a page's
031     * contents. This utility class is isolated to simplify WikiPageProvider
032     * implementations and to offer an easy target for upgrades. The upcoming(?)
033     * TranslatorReader rewrite will presumably invalidate this, among other things.
034     *
035     * @since 2.1.5
036     */
037    public class SearchMatcher {
038        
039        private QueryItem[] m_queries;
040        private WikiEngine m_engine;
041    
042        /**
043         *  Creates a new SearchMatcher.
044         *  
045         *  @param engine The WikiEngine
046         *  @param queries A list of queries
047         */
048        public SearchMatcher( WikiEngine engine, QueryItem[] queries ) {
049            m_engine = engine;
050            m_queries = queries != null ? queries.clone() : null;
051        }
052    
053        /**
054         * Compares the page content, available through the given stream, to the
055         * query items of this matcher. Returns a search result object describing
056         * the quality of the match.
057         *
058         * <p>This method would benefit of regexps (1.4) and streaming. FIXME!
059         * 
060         * @param wikiname The name of the page
061         * @param pageText The content of the page
062         * @return A SearchResult item, or null, there are no queries
063         * @throws IOException If reading page content fails
064         */
065        public SearchResult matchPageContent( String wikiname, String pageText ) throws IOException {
066            if( m_queries == null ) {
067                return null;
068            }
069    
070            int[] scores = new int[ m_queries.length ];
071            BufferedReader in = new BufferedReader( new StringReader( pageText ) );
072            String line = null;
073    
074            while( (line = in.readLine() ) != null ) {
075                line = line.toLowerCase();
076    
077                for( int j = 0; j < m_queries.length; j++ ) {
078                    int index = -1;
079    
080                    while( (index = line.indexOf( m_queries[j].word, index + 1 ) ) != -1 ) {
081                        if( m_queries[j].type != QueryItem.FORBIDDEN ) {
082                            scores[j]++; // Mark, found this word n times
083                        } else {
084                            // Found something that was forbidden.
085                            return null;
086                        }
087                    }
088                }
089            }
090    
091            //
092            //  Check that we have all required words.
093            //
094            int totalscore = 0;
095    
096            for( int j = 0; j < scores.length; j++ ) {
097                // Give five points for each occurrence
098                // of the word in the wiki name.
099    
100                if( wikiname.toLowerCase().indexOf( m_queries[j].word ) != -1 && m_queries[j].type != QueryItem.FORBIDDEN ) {
101                    scores[j] += 5;
102                }
103    
104                //  Filter out pages if the search word is marked 'required'
105                //  but they have no score.
106    
107                if( m_queries[j].type == QueryItem.REQUIRED && scores[j] == 0 ) {
108                    return null;
109                }
110    
111                //
112                //  Count the total score for this page.
113                //
114                totalscore += scores[j];
115            }
116    
117            if( totalscore > 0 ) {
118                return new SearchResultImpl( wikiname, totalscore );
119            }
120    
121            return null;
122        }
123    
124        /**
125         *  A local search result.
126         */
127        public class SearchResultImpl implements SearchResult {
128            
129            int      m_score;
130            WikiPage m_page;
131    
132            /**
133             *  Create a new SearchResult with a given name and a score.
134             *  
135             *  @param name Page Name
136             *  @param score A score from 0+
137             */
138            public SearchResultImpl( String name, int score ) {
139                m_page  = new WikiPage( m_engine, name );
140                m_score = score;
141            }
142    
143            /**
144             *  Returns Wikipage for this result.
145             *  @return WikiPage
146             */
147            public WikiPage getPage() {
148                return m_page;
149            }
150    
151            /**
152             *  Returns a score for this match.
153             *  
154             *  @return Score from 0+
155             */
156            public int getScore() {
157                return m_score;
158            }
159    
160            /**
161             *  Returns an empty array, since BasicSearchProvider does not support
162             *  context matching.
163             *  
164             *  @return an empty array
165             */
166            public String[] getContexts() {
167                // Unimplemented
168                return new String[0];
169            }
170        }
171    
172    }