001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.   
018 */
019package org.apache.wiki.search;
020
021import org.apache.wiki.WikiEngine;
022import org.apache.wiki.WikiPage;
023import org.apache.wiki.api.core.Engine;
024import org.apache.wiki.api.core.Page;
025import org.apache.wiki.api.search.QueryItem;
026import org.apache.wiki.api.spi.Wiki;
027
028import java.io.BufferedReader;
029import java.io.IOException;
030import java.io.StringReader;
031
032
033/**
034 * SearchMatcher performs the task of matching a search query to a page's contents. This utility class is isolated to simplify
035 * WikiPageProvider implementations and to offer an easy target for upgrades. The upcoming(?) TranslatorReader rewrite will
036 * presumably invalidate this, among other things.
037 *
038 * @since 2.1.5
039 */
040public class SearchMatcher {
041    
042    private final QueryItem[] m_queries;
043    private final Engine m_engine;
044
045    /**
046     *  Creates a new SearchMatcher.
047     *
048     *  @param engine The Engine
049     *  @param queries A list of queries
050     */
051    public SearchMatcher( final Engine engine, final QueryItem[] queries ) {
052        m_engine = engine;
053        m_queries = queries != null ? queries.clone() : null;
054    }
055
056    /**
057     * Creates a new SearchMatcher.
058     *
059     * @param engine The Engine
060     * @param queries A list of queries
061     * @deprecated kept for compatibility with page/attachment providers not using public API. Use {@code SearchMatcher(Engine, QueryItem)}
062     * instead.
063     */
064    @Deprecated
065    public SearchMatcher( final WikiEngine engine, final org.apache.wiki.search.QueryItem[] queries ) {
066        this( ( Engine )engine, queries );
067    }
068
069    /**
070     * Compares the page content, available through the given stream, to the query items of this matcher. Returns a search result
071     * object describing the quality of the match.
072     *
073     * <p>This method would benefit of regexps (1.4) and streaming. FIXME!
074     * 
075     * @param wikiname The name of the page
076     * @param pageText The content of the page
077     * @return A SearchResult item, or null, there are no queries
078     * @throws IOException If reading page content fails
079     */
080    public org.apache.wiki.search.SearchResult matchPageContent( final String wikiname, final String pageText ) throws IOException {
081        if( m_queries == null ) {
082            return null;
083        }
084
085        final int[] scores = new int[ m_queries.length ];
086        final BufferedReader in = new BufferedReader( new StringReader( pageText ) );
087        String line;
088
089        while( (line = in.readLine() ) != null ) {
090            line = line.toLowerCase();
091
092            for( int j = 0; j < m_queries.length; j++ ) {
093                int index = -1;
094
095                while( (index = line.indexOf( m_queries[j].word, index + 1 ) ) != -1 ) {
096                    if( m_queries[j].type != QueryItem.FORBIDDEN ) {
097                        scores[j]++; // Mark, found this word n times
098                    } else {
099                        // Found something that was forbidden.
100                        return null;
101                    }
102                }
103            }
104        }
105
106        //  Check that we have all required words.
107        int totalscore = 0;
108
109        for( int j = 0; j < scores.length; j++ ) {
110            // Give five points for each occurrence of the word in the wiki name.
111            if( wikiname.toLowerCase().contains( m_queries[ j ].word ) && m_queries[j].type != QueryItem.FORBIDDEN ) {
112                scores[j] += 5;
113            }
114
115            //  Filter out pages if the search word is marked 'required' but they have no score.
116            if( m_queries[j].type == QueryItem.REQUIRED && scores[j] == 0 ) {
117                return null;
118            }
119
120            //  Count the total score for this page.
121            totalscore += scores[j];
122        }
123
124        if( totalscore > 0 ) {
125            return new SearchResultImpl( wikiname, totalscore );
126        }
127
128        return null;
129    }
130
131    /**
132     *  A local search result.
133     */
134    public class SearchResultImpl implements org.apache.wiki.search.SearchResult {
135        
136        final int  m_score;
137        final Page m_page;
138
139        /**
140         *  Create a new SearchResult with a given name and a score.
141         *  
142         *  @param name Page Name
143         *  @param score A score from 0+
144         */
145        public SearchResultImpl( final String name, final int score ) {
146            m_page  = Wiki.contents().page( m_engine, name );
147            m_score = score;
148        }
149
150        /**
151         *  Returns Wikipage for this result.
152         *  @return WikiPage
153         */
154        @Override
155        public WikiPage getPage() {
156            return ( WikiPage )m_page;
157        }
158
159        /**
160         *  Returns a score for this match.
161         *  
162         *  @return Score from 0+
163         */
164        @Override
165        public int getScore() {
166            return m_score;
167        }
168
169        /**
170         *  Returns an empty array, since BasicSearchProvider does not support context matching.
171         *  
172         *  @return an empty array
173         */
174        @Override
175        public String[] getContexts() {
176            // Unimplemented
177            return new String[0];
178        }
179    }
180
181}