001/*
002    Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003
004    Licensed to the Apache Software Foundation (ASF) under one
005    or more contributor license agreements.  See the NOTICE file
006    distributed with this work for additional information
007    regarding copyright ownership.  The ASF licenses this file
008    to you under the Apache License, Version 2.0 (the
009    "License"); you may not use this file except in compliance
010    with the License.  You may obtain a copy of the License at
011
012       http://www.apache.org/licenses/LICENSE-2.0
013
014    Unless required by applicable law or agreed to in writing,
015    software distributed under the License is distributed on an
016    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017    KIND, either express or implied.  See the License for the
018    specific language governing permissions and limitations
019    under the License.
020 */
021package org.apache.wiki.plugin;
022
023import org.apache.log4j.Logger;
024import org.apache.oro.text.GlobCompiler;
025import org.apache.oro.text.regex.MalformedPatternException;
026import org.apache.oro.text.regex.Pattern;
027import org.apache.oro.text.regex.PatternCompiler;
028import org.apache.oro.text.regex.PatternMatcher;
029import org.apache.oro.text.regex.Perl5Matcher;
030import org.apache.wiki.api.core.Context;
031import org.apache.wiki.api.exceptions.PluginException;
032import org.apache.wiki.api.plugin.Plugin;
033import org.apache.wiki.util.TextUtil;
034
035import javax.servlet.http.HttpServletRequest;
036import java.io.IOException;
037import java.io.InputStream;
038import java.util.ArrayList;
039import java.util.Enumeration;
040import java.util.List;
041import java.util.Map;
042import java.util.Properties;
043
044/**
045 *  Denounces a link by removing it from any search engine.
046 *  <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
047 *
048 *  <p>Parameters : </p>
049 *  <ul>
050 *  <li><b>link</b> - The link to be denounced, this parameter is required</li>
051 *  <li><b>text</b> - The text to use, defaults to the link</li>
052 *  </ul>
053 *
054 *  @since 2.1.40.
055 */
056public class Denounce implements Plugin {
057
058    private static final Logger log = Logger.getLogger(Denounce.class);
059
060    /** Parameter name for setting the link.  Value is <tt>{@value}</tt>. */
061    public static final String PARAM_LINK = "link";
062    /** Parameter name for setting the text.  Value is <tt>{@value}</tt>. */
063    public static final String PARAM_TEXT = "text";
064
065    private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
066    private static final String PROP_AGENTPATTERN   = "denounce.agentpattern.";
067    private static final String PROP_HOSTPATTERN    = "denounce.hostpattern.";
068    private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
069
070    private static final String PROP_DENOUNCETEXT   = "denounce.denouncetext";
071
072    private static ArrayList<Pattern> c_refererPatterns = new ArrayList<>();
073    private static ArrayList<Pattern> c_agentPatterns   = new ArrayList<>();
074    private static ArrayList<Pattern> c_hostPatterns    = new ArrayList<>();
075
076    private static String    c_denounceText    = "";
077
078    /**
079     *  Prepares the different patterns for later use.  Compiling is
080     *  (probably) expensive, so we do it statically at class load time.
081     */
082    static
083    {
084        try
085        {
086            final PatternCompiler compiler = new GlobCompiler();
087            final ClassLoader loader = Denounce.class.getClassLoader();
088
089            final InputStream in = loader.getResourceAsStream( PROPERTYFILE );
090
091            if( in == null )
092            {
093                throw new IOException("No property file found! (Check the installation, it should be there.)");
094            }
095
096            final Properties props = new Properties();
097            props.load( in );
098
099            c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
100
101            for( final Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); )
102            {
103                final String name = (String) e.nextElement();
104
105                try
106                {
107                    if( name.startsWith( PROP_REFERERPATTERN ) )
108                    {
109                        c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
110                    }
111                    else if( name.startsWith( PROP_AGENTPATTERN ) )
112                    {
113                        c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
114                    }
115                    else if( name.startsWith( PROP_HOSTPATTERN ) )
116                    {
117                        c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
118                    }
119                }
120                catch( final MalformedPatternException ex )
121                {
122                    log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
123                }
124            }
125
126            log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list.");
127        }
128        catch( final IOException e )
129        {
130            log.error( "Unable to load URL patterns from "+PROPERTYFILE, e );
131        }
132        catch( final Exception e )
133        {
134            log.error( "Unable to initialize Denounce plugin", e );
135        }
136    }
137
138    /**
139     *  {@inheritDoc}
140     */
141    @Override
142    public String execute( final Context context, final Map<String, String> params ) throws PluginException {
143        final String link = params.get( PARAM_LINK );
144        String text = params.get( PARAM_TEXT );
145        boolean linkAllowed = true;
146
147        if( link == null )
148        {
149            throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!");
150        }
151
152        final HttpServletRequest request = context.getHttpRequest();
153
154        if( request != null )
155        {
156            linkAllowed = !matchHeaders( request );
157        }
158
159        if( text == null ) text = link;
160
161        if( linkAllowed )
162        {
163            // FIXME: Should really call TranslatorReader
164            return "<a href=\""+link+"\">"+ TextUtil.replaceEntities(text) +"</a>";
165        }
166
167        return c_denounceText;
168    }
169
170    /**
171     *  Returns true, if the path is found among the referers.
172     */
173    private boolean matchPattern( final List< Pattern > list, final String path ) {
174        final PatternMatcher matcher = new Perl5Matcher();
175        for( final Pattern pattern : list ) {
176            if( matcher.matches( path, pattern ) ) {
177                return true;
178            }
179        }
180
181        return false;
182    }
183
184    // FIXME: Should really return immediately when a match is found.
185
186    private boolean matchHeaders( final HttpServletRequest request )
187    {
188        //
189        //  User Agent
190        //
191
192        final String userAgent = request.getHeader("User-Agent");
193
194        if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) )
195        {
196            log.debug("Matched user agent "+userAgent+" for denounce.");
197            return true;
198        }
199
200        //
201        //  Referrer header
202        //
203
204        final String refererPath = request.getHeader("Referer");
205
206        if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) )
207        {
208            log.debug("Matched referer "+refererPath+" for denounce.");
209            return true;
210        }
211
212        //
213        //  Host
214        //
215
216        final String host = request.getRemoteHost();
217
218        if( host != null && matchPattern( c_hostPatterns, host ) )
219        {
220            log.debug("Matched host "+host+" for denounce.");
221            return true;
222        }
223
224        return false;
225    }
226}