001/*
002    Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003
004    Licensed to the Apache Software Foundation (ASF) under one
005    or more contributor license agreements.  See the NOTICE file
006    distributed with this work for additional information
007    regarding copyright ownership.  The ASF licenses this file
008    to you under the Apache License, Version 2.0 (the
009    "License"); you may not use this file except in compliance
010    with the License.  You may obtain a copy of the License at
011
012       http://www.apache.org/licenses/LICENSE-2.0
013
014    Unless required by applicable law or agreed to in writing,
015    software distributed under the License is distributed on an
016    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017    KIND, either express or implied.  See the License for the
018    specific language governing permissions and limitations
019    under the License.
020 */
021package org.apache.wiki.plugin;
022
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.*;
026
027import javax.servlet.http.HttpServletRequest;
028
029import org.apache.log4j.Logger;
030import org.apache.oro.text.GlobCompiler;
031import org.apache.oro.text.regex.*;
032import org.apache.wiki.WikiContext;
033import org.apache.wiki.api.exceptions.PluginException;
034import org.apache.wiki.api.plugin.WikiPlugin;
035
036/**
037 *  Denounces a link by removing it from any search engine.
038 *  <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
039 *
040 *  <p>Parameters : </p>
041 *  <ul>
042 *  <li><b>link</b> - The link to be denounced, this parameter is required</li>
043 *  <li><b>text</b> - The text to use, defaults to the link</li>
044 *  </ul>
045 *
046 *  @since 2.1.40.
047 */
048public class Denounce implements WikiPlugin
049{
050    private static Logger     log = Logger.getLogger(Denounce.class);
051
052    /** Parameter name for setting the link.  Value is <tt>{@value}</tt>. */
053    public static final String PARAM_LINK = "link";
054    /** Parameter name for setting the text.  Value is <tt>{@value}</tt>. */
055    public static final String PARAM_TEXT = "text";
056
057    private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
058    private static final String PROP_AGENTPATTERN   = "denounce.agentpattern.";
059    private static final String PROP_HOSTPATTERN    = "denounce.hostpattern.";
060    private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
061
062    private static final String PROP_DENOUNCETEXT   = "denounce.denouncetext";
063
064    private static ArrayList<Pattern> c_refererPatterns = new ArrayList<Pattern>();
065    private static ArrayList<Pattern> c_agentPatterns   = new ArrayList<Pattern>();
066    private static ArrayList<Pattern> c_hostPatterns    = new ArrayList<Pattern>();
067
068    private static String    c_denounceText    = "";
069
070    /**
071     *  Prepares the different patterns for later use.  Compiling is
072     *  (probably) expensive, so we do it statically at class load time.
073     */
074    static
075    {
076        try
077        {
078            PatternCompiler compiler = new GlobCompiler();
079            ClassLoader loader = Denounce.class.getClassLoader();
080
081            InputStream in = loader.getResourceAsStream( PROPERTYFILE );
082
083            if( in == null )
084            {
085                throw new IOException("No property file found! (Check the installation, it should be there.)");
086            }
087
088            Properties props = new Properties();
089            props.load( in );
090
091            c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
092
093            for( Enumeration e = props.propertyNames(); e.hasMoreElements(); )
094            {
095                String name = (String) e.nextElement();
096
097                try
098                {
099                    if( name.startsWith( PROP_REFERERPATTERN ) )
100                    {
101                        c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
102                    }
103                    else if( name.startsWith( PROP_AGENTPATTERN ) )
104                    {
105                        c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
106                    }
107                    else if( name.startsWith( PROP_HOSTPATTERN ) )
108                    {
109                        c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
110                    }
111                }
112                catch( MalformedPatternException ex )
113                {
114                    log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
115                }
116            }
117
118            log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list.");
119        }
120        catch( IOException e )
121        {
122            log.error( "Unable to load URL patterns from "+PROPERTYFILE, e );
123        }
124        catch( Exception e )
125        {
126            log.error( "Unable to initialize Denounce plugin", e );
127        }
128    }
129
130    /**
131     *  {@inheritDoc}
132     */
133    public String execute( WikiContext context, Map<String, String> params )
134        throws PluginException
135    {
136        String link = params.get( PARAM_LINK );
137        String text = params.get( PARAM_TEXT );
138        boolean linkAllowed = true;
139
140        if( link == null )
141        {
142            throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!");
143        }
144
145        HttpServletRequest request = context.getHttpRequest();
146
147        if( request != null )
148        {
149            linkAllowed = !matchHeaders( request );
150        }
151
152        if( text == null ) text = link;
153
154        if( linkAllowed )
155        {
156            // FIXME: Should really call TranslatorReader
157            return "<a href=\""+link+"\">"+text+"</a>";
158        }
159
160        return c_denounceText;
161    }
162
163    /**
164     *  Returns true, if the path is found among the referers.
165     */
166    private boolean matchPattern( List< Pattern > list, String path )
167    {
168        PatternMatcher matcher = new Perl5Matcher();
169
170        for( Iterator< Pattern > i = list.iterator(); i.hasNext(); )
171        {
172            if( matcher.matches( path, i.next() ) )
173            {
174                return true;
175            }
176        }
177
178        return false;
179    }
180
181    // FIXME: Should really return immediately when a match is found.
182
183    private boolean matchHeaders( HttpServletRequest request )
184    {
185        //
186        //  User Agent
187        //
188
189        String userAgent = request.getHeader("User-Agent");
190
191        if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) )
192        {
193            log.debug("Matched user agent "+userAgent+" for denounce.");
194            return true;
195        }
196
197        //
198        //  Referrer header
199        //
200
201        String refererPath = request.getHeader("Referer");
202
203        if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) )
204        {
205            log.debug("Matched referer "+refererPath+" for denounce.");
206            return true;
207        }
208
209        //
210        //  Host
211        //
212
213        String host = request.getRemoteHost();
214
215        if( host != null && matchPattern( c_hostPatterns, host ) )
216        {
217            log.debug("Matched host "+host+" for denounce.");
218            return true;
219        }
220
221        return false;
222    }
223}