001/*
002    Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003
004    Licensed to the Apache Software Foundation (ASF) under one
005    or more contributor license agreements.  See the NOTICE file
006    distributed with this work for additional information
007    regarding copyright ownership.  The ASF licenses this file
008    to you under the Apache License, Version 2.0 (the
009    "License"); you may not use this file except in compliance
010    with the License.  You may obtain a copy of the License at
011
012       http://www.apache.org/licenses/LICENSE-2.0
013
014    Unless required by applicable law or agreed to in writing,
015    software distributed under the License is distributed on an
016    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017    KIND, either express or implied.  See the License for the
018    specific language governing permissions and limitations
019    under the License.
020 */
021package org.apache.wiki.plugin;
022
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.ArrayList;
026import java.util.Enumeration;
027import java.util.Iterator;
028import java.util.List;
029import java.util.Map;
030import java.util.Properties;
031
032import javax.servlet.http.HttpServletRequest;
033
034import org.apache.log4j.Logger;
035import org.apache.oro.text.GlobCompiler;
036import org.apache.oro.text.regex.MalformedPatternException;
037import org.apache.oro.text.regex.Pattern;
038import org.apache.oro.text.regex.PatternCompiler;
039import org.apache.oro.text.regex.PatternMatcher;
040import org.apache.oro.text.regex.Perl5Matcher;
041import org.apache.wiki.WikiContext;
042import org.apache.wiki.api.exceptions.PluginException;
043import org.apache.wiki.api.plugin.WikiPlugin;
044import org.apache.wiki.util.TextUtil;
045
046/**
047 *  Denounces a link by removing it from any search engine.
048 *  <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
049 *
050 *  <p>Parameters : </p>
051 *  <ul>
052 *  <li><b>link</b> - The link to be denounced, this parameter is required</li>
053 *  <li><b>text</b> - The text to use, defaults to the link</li>
054 *  </ul>
055 *
056 *  @since 2.1.40.
057 */
058public class Denounce implements WikiPlugin
059{
060    private static Logger     log = Logger.getLogger(Denounce.class);
061
062    /** Parameter name for setting the link.  Value is <tt>{@value}</tt>. */
063    public static final String PARAM_LINK = "link";
064    /** Parameter name for setting the text.  Value is <tt>{@value}</tt>. */
065    public static final String PARAM_TEXT = "text";
066
067    private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
068    private static final String PROP_AGENTPATTERN   = "denounce.agentpattern.";
069    private static final String PROP_HOSTPATTERN    = "denounce.hostpattern.";
070    private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
071
072    private static final String PROP_DENOUNCETEXT   = "denounce.denouncetext";
073
074    private static ArrayList<Pattern> c_refererPatterns = new ArrayList<>();
075    private static ArrayList<Pattern> c_agentPatterns   = new ArrayList<>();
076    private static ArrayList<Pattern> c_hostPatterns    = new ArrayList<>();
077
078    private static String    c_denounceText    = "";
079
080    /**
081     *  Prepares the different patterns for later use.  Compiling is
082     *  (probably) expensive, so we do it statically at class load time.
083     */
084    static
085    {
086        try
087        {
088            PatternCompiler compiler = new GlobCompiler();
089            ClassLoader loader = Denounce.class.getClassLoader();
090
091            InputStream in = loader.getResourceAsStream( PROPERTYFILE );
092
093            if( in == null )
094            {
095                throw new IOException("No property file found! (Check the installation, it should be there.)");
096            }
097
098            Properties props = new Properties();
099            props.load( in );
100
101            c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
102
103            for( Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); )
104            {
105                String name = (String) e.nextElement();
106
107                try
108                {
109                    if( name.startsWith( PROP_REFERERPATTERN ) )
110                    {
111                        c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
112                    }
113                    else if( name.startsWith( PROP_AGENTPATTERN ) )
114                    {
115                        c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
116                    }
117                    else if( name.startsWith( PROP_HOSTPATTERN ) )
118                    {
119                        c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
120                    }
121                }
122                catch( MalformedPatternException ex )
123                {
124                    log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
125                }
126            }
127
128            log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list.");
129        }
130        catch( IOException e )
131        {
132            log.error( "Unable to load URL patterns from "+PROPERTYFILE, e );
133        }
134        catch( Exception e )
135        {
136            log.error( "Unable to initialize Denounce plugin", e );
137        }
138    }
139
140    /**
141     *  {@inheritDoc}
142     */
143    @Override
144    public String execute( WikiContext context, Map<String, String> params )
145        throws PluginException
146    {
147        String link = params.get( PARAM_LINK );
148        String text = params.get( PARAM_TEXT );
149        boolean linkAllowed = true;
150
151        if( link == null )
152        {
153            throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!");
154        }
155
156        HttpServletRequest request = context.getHttpRequest();
157
158        if( request != null )
159        {
160            linkAllowed = !matchHeaders( request );
161        }
162
163        if( text == null ) text = link;
164
165        if( linkAllowed )
166        {
167            // FIXME: Should really call TranslatorReader
168            return "<a href=\""+link+"\">"+ TextUtil.replaceEntities(text) +"</a>";
169        }
170
171        return c_denounceText;
172    }
173
174    /**
175     *  Returns true, if the path is found among the referers.
176     */
177    private boolean matchPattern( List< Pattern > list, String path )
178    {
179        PatternMatcher matcher = new Perl5Matcher();
180
181        for( Iterator< Pattern > i = list.iterator(); i.hasNext(); )
182        {
183            if( matcher.matches( path, i.next() ) )
184            {
185                return true;
186            }
187        }
188
189        return false;
190    }
191
192    // FIXME: Should really return immediately when a match is found.
193
194    private boolean matchHeaders( HttpServletRequest request )
195    {
196        //
197        //  User Agent
198        //
199
200        String userAgent = request.getHeader("User-Agent");
201
202        if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) )
203        {
204            log.debug("Matched user agent "+userAgent+" for denounce.");
205            return true;
206        }
207
208        //
209        //  Referrer header
210        //
211
212        String refererPath = request.getHeader("Referer");
213
214        if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) )
215        {
216            log.debug("Matched referer "+refererPath+" for denounce.");
217            return true;
218        }
219
220        //
221        //  Host
222        //
223
224        String host = request.getRemoteHost();
225
226        if( host != null && matchPattern( c_hostPatterns, host ) )
227        {
228            log.debug("Matched host "+host+" for denounce.");
229            return true;
230        }
231
232        return false;
233    }
234}