001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.   
018 */
019package org.apache.wiki.search;
020
021import java.io.IOException;
022import java.io.BufferedReader;
023import java.io.StringReader;
024
025import org.apache.wiki.WikiEngine;
026import org.apache.wiki.WikiPage;
027
028
029/**
030 * SearchMatcher performs the task of matching a search query to a page's
031 * contents. This utility class is isolated to simplify WikiPageProvider
032 * implementations and to offer an easy target for upgrades. The upcoming(?)
033 * TranslatorReader rewrite will presumably invalidate this, among other things.
034 *
035 * @since 2.1.5
036 */
037public class SearchMatcher {
038    
039    private QueryItem[] m_queries;
040    private WikiEngine m_engine;
041
042    /**
043     *  Creates a new SearchMatcher.
044     *  
045     *  @param engine The WikiEngine
046     *  @param queries A list of queries
047     */
048    public SearchMatcher( WikiEngine engine, QueryItem[] queries ) {
049        m_engine = engine;
050        m_queries = queries != null ? queries.clone() : null;
051    }
052
053    /**
054     * Compares the page content, available through the given stream, to the
055     * query items of this matcher. Returns a search result object describing
056     * the quality of the match.
057     *
058     * <p>This method would benefit of regexps (1.4) and streaming. FIXME!
059     * 
060     * @param wikiname The name of the page
061     * @param pageText The content of the page
062     * @return A SearchResult item, or null, there are no queries
063     * @throws IOException If reading page content fails
064     */
065    public SearchResult matchPageContent( String wikiname, String pageText ) throws IOException {
066        if( m_queries == null ) {
067            return null;
068        }
069
070        int[] scores = new int[ m_queries.length ];
071        BufferedReader in = new BufferedReader( new StringReader( pageText ) );
072        String line = null;
073
074        while( (line = in.readLine() ) != null ) {
075            line = line.toLowerCase();
076
077            for( int j = 0; j < m_queries.length; j++ ) {
078                int index = -1;
079
080                while( (index = line.indexOf( m_queries[j].word, index + 1 ) ) != -1 ) {
081                    if( m_queries[j].type != QueryItem.FORBIDDEN ) {
082                        scores[j]++; // Mark, found this word n times
083                    } else {
084                        // Found something that was forbidden.
085                        return null;
086                    }
087                }
088            }
089        }
090
091        //
092        //  Check that we have all required words.
093        //
094        int totalscore = 0;
095
096        for( int j = 0; j < scores.length; j++ ) {
097            // Give five points for each occurrence
098            // of the word in the wiki name.
099
100            if( wikiname.toLowerCase().indexOf( m_queries[j].word ) != -1 && m_queries[j].type != QueryItem.FORBIDDEN ) {
101                scores[j] += 5;
102            }
103
104            //  Filter out pages if the search word is marked 'required'
105            //  but they have no score.
106
107            if( m_queries[j].type == QueryItem.REQUIRED && scores[j] == 0 ) {
108                return null;
109            }
110
111            //
112            //  Count the total score for this page.
113            //
114            totalscore += scores[j];
115        }
116
117        if( totalscore > 0 ) {
118            return new SearchResultImpl( wikiname, totalscore );
119        }
120
121        return null;
122    }
123
124    /**
125     *  A local search result.
126     */
127    public class SearchResultImpl implements SearchResult {
128        
129        int      m_score;
130        WikiPage m_page;
131
132        /**
133         *  Create a new SearchResult with a given name and a score.
134         *  
135         *  @param name Page Name
136         *  @param score A score from 0+
137         */
138        public SearchResultImpl( String name, int score ) {
139            m_page  = new WikiPage( m_engine, name );
140            m_score = score;
141        }
142
143        /**
144         *  Returns Wikipage for this result.
145         *  @return WikiPage
146         */
147        public WikiPage getPage() {
148            return m_page;
149        }
150
151        /**
152         *  Returns a score for this match.
153         *  
154         *  @return Score from 0+
155         */
156        public int getScore() {
157            return m_score;
158        }
159
160        /**
161         *  Returns an empty array, since BasicSearchProvider does not support
162         *  context matching.
163         *  
164         *  @return an empty array
165         */
166        public String[] getContexts() {
167            // Unimplemented
168            return new String[0];
169        }
170    }
171
172}