001/*
002    Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003
004    Licensed to the Apache Software Foundation (ASF) under one
005    or more contributor license agreements.  See the NOTICE file
006    distributed with this work for additional information
007    regarding copyright ownership.  The ASF licenses this file
008    to you under the Apache License, Version 2.0 (the
009    "License"); you may not use this file except in compliance
010    with the License.  You may obtain a copy of the License at
011
012       http://www.apache.org/licenses/LICENSE-2.0
013
014    Unless required by applicable law or agreed to in writing,
015    software distributed under the License is distributed on an
016    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017    KIND, either express or implied.  See the License for the
018    specific language governing permissions and limitations
019    under the License.
020 */
021package org.apache.wiki.plugin;
022
023import java.io.IOException;
024import java.io.InputStream;
025import java.util.ArrayList;
026import java.util.Enumeration;
027import java.util.Iterator;
028import java.util.List;
029import java.util.Map;
030import java.util.Properties;
031
032import javax.servlet.http.HttpServletRequest;
033
034import org.apache.log4j.Logger;
035import org.apache.oro.text.GlobCompiler;
036import org.apache.oro.text.regex.MalformedPatternException;
037import org.apache.oro.text.regex.Pattern;
038import org.apache.oro.text.regex.PatternCompiler;
039import org.apache.oro.text.regex.PatternMatcher;
040import org.apache.oro.text.regex.Perl5Matcher;
041import org.apache.wiki.WikiContext;
042import org.apache.wiki.api.exceptions.PluginException;
043import org.apache.wiki.api.plugin.WikiPlugin;
044
045/**
046 *  Denounces a link by removing it from any search engine.
047 *  <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
048 *
049 *  <p>Parameters : </p>
050 *  <ul>
051 *  <li><b>link</b> - The link to be denounced, this parameter is required</li>
052 *  <li><b>text</b> - The text to use, defaults to the link</li>
053 *  </ul>
054 *
055 *  @since 2.1.40.
056 */
057public class Denounce implements WikiPlugin
058{
059    private static Logger     log = Logger.getLogger(Denounce.class);
060
061    /** Parameter name for setting the link.  Value is <tt>{@value}</tt>. */
062    public static final String PARAM_LINK = "link";
063    /** Parameter name for setting the text.  Value is <tt>{@value}</tt>. */
064    public static final String PARAM_TEXT = "text";
065
066    private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
067    private static final String PROP_AGENTPATTERN   = "denounce.agentpattern.";
068    private static final String PROP_HOSTPATTERN    = "denounce.hostpattern.";
069    private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
070
071    private static final String PROP_DENOUNCETEXT   = "denounce.denouncetext";
072
073    private static ArrayList<Pattern> c_refererPatterns = new ArrayList<>();
074    private static ArrayList<Pattern> c_agentPatterns   = new ArrayList<>();
075    private static ArrayList<Pattern> c_hostPatterns    = new ArrayList<>();
076
077    private static String    c_denounceText    = "";
078
079    /**
080     *  Prepares the different patterns for later use.  Compiling is
081     *  (probably) expensive, so we do it statically at class load time.
082     */
083    static
084    {
085        try
086        {
087            PatternCompiler compiler = new GlobCompiler();
088            ClassLoader loader = Denounce.class.getClassLoader();
089
090            InputStream in = loader.getResourceAsStream( PROPERTYFILE );
091
092            if( in == null )
093            {
094                throw new IOException("No property file found! (Check the installation, it should be there.)");
095            }
096
097            Properties props = new Properties();
098            props.load( in );
099
100            c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
101
102            for( Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); )
103            {
104                String name = (String) e.nextElement();
105
106                try
107                {
108                    if( name.startsWith( PROP_REFERERPATTERN ) )
109                    {
110                        c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
111                    }
112                    else if( name.startsWith( PROP_AGENTPATTERN ) )
113                    {
114                        c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
115                    }
116                    else if( name.startsWith( PROP_HOSTPATTERN ) )
117                    {
118                        c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
119                    }
120                }
121                catch( MalformedPatternException ex )
122                {
123                    log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
124                }
125            }
126
127            log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list.");
128        }
129        catch( IOException e )
130        {
131            log.error( "Unable to load URL patterns from "+PROPERTYFILE, e );
132        }
133        catch( Exception e )
134        {
135            log.error( "Unable to initialize Denounce plugin", e );
136        }
137    }
138
139    /**
140     *  {@inheritDoc}
141     */
142    @Override
143    public String execute( WikiContext context, Map<String, String> params )
144        throws PluginException
145    {
146        String link = params.get( PARAM_LINK );
147        String text = params.get( PARAM_TEXT );
148        boolean linkAllowed = true;
149
150        if( link == null )
151        {
152            throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!");
153        }
154
155        HttpServletRequest request = context.getHttpRequest();
156
157        if( request != null )
158        {
159            linkAllowed = !matchHeaders( request );
160        }
161
162        if( text == null ) text = link;
163
164        if( linkAllowed )
165        {
166            // FIXME: Should really call TranslatorReader
167            return "<a href=\""+link+"\">"+text+"</a>";
168        }
169
170        return c_denounceText;
171    }
172
173    /**
174     *  Returns true, if the path is found among the referers.
175     */
176    private boolean matchPattern( List< Pattern > list, String path )
177    {
178        PatternMatcher matcher = new Perl5Matcher();
179
180        for( Iterator< Pattern > i = list.iterator(); i.hasNext(); )
181        {
182            if( matcher.matches( path, i.next() ) )
183            {
184                return true;
185            }
186        }
187
188        return false;
189    }
190
191    // FIXME: Should really return immediately when a match is found.
192
193    private boolean matchHeaders( HttpServletRequest request )
194    {
195        //
196        //  User Agent
197        //
198
199        String userAgent = request.getHeader("User-Agent");
200
201        if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) )
202        {
203            log.debug("Matched user agent "+userAgent+" for denounce.");
204            return true;
205        }
206
207        //
208        //  Referrer header
209        //
210
211        String refererPath = request.getHeader("Referer");
212
213        if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) )
214        {
215            log.debug("Matched referer "+refererPath+" for denounce.");
216            return true;
217        }
218
219        //
220        //  Host
221        //
222
223        String host = request.getRemoteHost();
224
225        if( host != null && matchPattern( c_hostPatterns, host ) )
226        {
227            log.debug("Matched host "+host+" for denounce.");
228            return true;
229        }
230
231        return false;
232    }
233}