001/*
002    Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003
004    Licensed to the Apache Software Foundation (ASF) under one
005    or more contributor license agreements.  See the NOTICE file
006    distributed with this work for additional information
007    regarding copyright ownership.  The ASF licenses this file
008    to you under the Apache License, Version 2.0 (the
009    "License"); you may not use this file except in compliance
010    with the License.  You may obtain a copy of the License at
011
012       http://www.apache.org/licenses/LICENSE-2.0
013
014    Unless required by applicable law or agreed to in writing,
015    software distributed under the License is distributed on an
016    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017    KIND, either express or implied.  See the License for the
018    specific language governing permissions and limitations
019    under the License.
020 */
021package org.apache.wiki.plugin;
022
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.apache.oro.text.GlobCompiler;
026import org.apache.oro.text.regex.MalformedPatternException;
027import org.apache.oro.text.regex.Pattern;
028import org.apache.oro.text.regex.PatternCompiler;
029import org.apache.oro.text.regex.PatternMatcher;
030import org.apache.oro.text.regex.Perl5Matcher;
031import org.apache.wiki.api.core.Context;
032import org.apache.wiki.api.exceptions.PluginException;
033import org.apache.wiki.api.plugin.Plugin;
034import org.apache.wiki.util.TextUtil;
035
036import javax.servlet.http.HttpServletRequest;
037import java.io.IOException;
038import java.io.InputStream;
039import java.net.URL;
040import java.util.ArrayList;
041import java.util.Enumeration;
042import java.util.List;
043import java.util.Map;
044import java.util.Properties;
045
046/**
047 *  Denounces a link by removing it from any search engine.
048 *  <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
049 *
050 *  <p>Parameters : </p>
051 *  <ul>
052 *  <li><b>link</b> - The link to be denounced, this parameter is required</li>
053 *  <li><b>text</b> - The text to use, defaults to the link</li>
054 *  </ul>
055 *
056 *  @since 2.1.40.
057 */
058public class Denounce implements Plugin {
059
060    private static final Logger log = LogManager.getLogger( Denounce.class );
061
062    /** Parameter name for setting the link.  Value is <tt>{@value}</tt>. */
063    public static final String PARAM_LINK = "link";
064    /** Parameter name for setting the text.  Value is <tt>{@value}</tt>. */
065    public static final String PARAM_TEXT = "text";
066
067    private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
068    private static final String PROP_AGENTPATTERN   = "denounce.agentpattern.";
069    private static final String PROP_HOSTPATTERN    = "denounce.hostpattern.";
070    private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
071
072    private static final String PROP_DENOUNCETEXT   = "denounce.denouncetext";
073
074    private static final ArrayList< Pattern > c_refererPatterns = new ArrayList<>();
075    private static final ArrayList< Pattern > c_agentPatterns   = new ArrayList<>();
076    private static final ArrayList< Pattern > c_hostPatterns    = new ArrayList<>();
077
078    private static String c_denounceText = "";
079
080    /*
081     *  Prepares the different patterns for later use.  Compiling is
082     *  (probably) expensive, so we do it statically at class load time.
083     */
084    static {
085        try {
086            final PatternCompiler compiler = new GlobCompiler();
087            final ClassLoader loader = Denounce.class.getClassLoader();
088            final InputStream in = loader.getResourceAsStream( PROPERTYFILE );
089            if( in == null ) {
090                throw new IOException( "No property file found! (Check the installation, it should be there.)" );
091            }
092
093            final Properties props = new Properties();
094            props.load( in );
095
096            c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
097
098            for( final Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); ) {
099                final String name = (String) e.nextElement();
100
101                try {
102                    if( name.startsWith( PROP_REFERERPATTERN ) ) {
103                        c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
104                    } else if( name.startsWith( PROP_AGENTPATTERN ) ) {
105                        c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
106                    } else if( name.startsWith( PROP_HOSTPATTERN ) ) {
107                        c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
108                    }
109                } catch( final MalformedPatternException ex ) {
110                    log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
111                }
112            }
113
114            log.debug( "Added " + c_refererPatterns.size() + c_agentPatterns.size() + c_hostPatterns.size() + " crawlers to denounce list." );
115        } catch( final IOException e ) {
116            log.error( "Unable to load URL patterns from " + PROPERTYFILE, e );
117        } catch( final Exception e ) {
118            log.error( "Unable to initialize Denounce plugin", e );
119        }
120    }
121
122    /**
123     *  {@inheritDoc}
124     */
125    @Override
126    public String execute( final Context context, final Map<String, String> params ) throws PluginException {
127        final String link = params.get( PARAM_LINK );
128        String text = params.get( PARAM_TEXT );
129        boolean linkAllowed = true;
130
131        if( link == null ) {
132            throw new PluginException( "Denounce: No parameter "+PARAM_LINK+" defined!" );
133        }
134        if( !isLinkValid( link ) ) {
135            throw new PluginException( "Denounce: Not a valid link " + link );
136        }
137
138        final HttpServletRequest request = context.getHttpRequest();
139        if( request != null ) {
140            linkAllowed = !matchHeaders( request );
141        }
142
143        if( text == null ) {
144            text = link;
145        }
146
147        if( linkAllowed ) {
148            return "<a href=\"" + link + "\">" + TextUtil.replaceEntities( text ) + "</a>";
149        }
150
151        return c_denounceText;
152    }
153
154    boolean isLinkValid( final String link ) {
155        try {
156            new URL( link ).toURI().parseServerAuthority();
157        } catch ( final Exception e ) {
158            log.debug( "invalid link {} - {}", link, e.getMessage() );
159            return false;
160        }
161        return true;
162    }
163
164    /**
165     *  Returns true, if the path is found among the referers.
166     */
167    private boolean matchPattern( final List< Pattern > list, final String path ) {
168        final PatternMatcher matcher = new Perl5Matcher();
169        for( final Pattern pattern : list ) {
170            if( matcher.matches( path, pattern ) ) {
171                return true;
172            }
173        }
174        return false;
175    }
176
177    private boolean matchHeaders( final HttpServletRequest request ) {
178        //  User Agent
179        final String userAgent = request.getHeader( "User-Agent" );
180        if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) ) {
181            log.debug( "Matched user agent " + userAgent + " for denounce." );
182            return true;
183        }
184
185        //  Referrer header
186        final String refererPath = request.getHeader( "Referer" );
187        if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) ) {
188            log.debug( "Matched referer " + refererPath + " for denounce." );
189            return true;
190        }
191
192        //  Host
193        final String host = request.getRemoteHost();
194        if( host != null && matchPattern( c_hostPatterns, host ) ) {
195            log.debug( "Matched host " + host + " for denounce." );
196            return true;
197        }
198
199        return false;
200    }
201
202}