001/*
002    Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003
004    Licensed to the Apache Software Foundation (ASF) under one
005    or more contributor license agreements.  See the NOTICE file
006    distributed with this work for additional information
007    regarding copyright ownership.  The ASF licenses this file
008    to you under the Apache License, Version 2.0 (the
009    "License"); you may not use this file except in compliance
010    with the License.  You may obtain a copy of the License at
011
012       http://www.apache.org/licenses/LICENSE-2.0
013
014    Unless required by applicable law or agreed to in writing,
015    software distributed under the License is distributed on an
016    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017    KIND, either express or implied.  See the License for the
018    specific language governing permissions and limitations
019    under the License.
020 */
021package org.apache.wiki.plugin;
022
023import org.apache.logging.log4j.LogManager;
024import org.apache.logging.log4j.Logger;
025import org.apache.oro.text.GlobCompiler;
026import org.apache.oro.text.regex.MalformedPatternException;
027import org.apache.oro.text.regex.Pattern;
028import org.apache.oro.text.regex.PatternCompiler;
029import org.apache.oro.text.regex.PatternMatcher;
030import org.apache.oro.text.regex.Perl5Matcher;
031import org.apache.wiki.api.core.Context;
032import org.apache.wiki.api.exceptions.PluginException;
033import org.apache.wiki.api.plugin.Plugin;
034import org.apache.wiki.util.TextUtil;
035
036import javax.servlet.http.HttpServletRequest;
037import java.io.IOException;
038import java.io.InputStream;
039import java.net.URL;
040import java.util.ArrayList;
041import java.util.Enumeration;
042import java.util.List;
043import java.util.Map;
044import java.util.Properties;
045
046/**
047 *  Denounces a link by removing it from any search engine.
048 *  <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
049 *
050 *  <p>Parameters : </p>
051 *  <ul>
052 *  <li><b>link</b> - The link to be denounced, this parameter is required</li>
053 *  <li><b>text</b> - The text to use, defaults to the link</li>
054 *  </ul>
055 *
056 *  @since 2.1.40.
057 */
058public class Denounce implements Plugin {
059
060    private static final Logger LOG = LogManager.getLogger( Denounce.class );
061
062    /** Parameter name for setting the link.  Value is <tt>{@value}</tt>. */
063    public static final String PARAM_LINK = "link";
064    /** Parameter name for setting the text.  Value is <tt>{@value}</tt>. */
065    public static final String PARAM_TEXT = "text";
066
067    private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
068    private static final String PROP_AGENTPATTERN   = "denounce.agentpattern.";
069    private static final String PROP_HOSTPATTERN    = "denounce.hostpattern.";
070    private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
071
072    private static final String PROP_DENOUNCETEXT   = "denounce.denouncetext";
073
074    private static final ArrayList< Pattern > c_refererPatterns = new ArrayList<>();
075    private static final ArrayList< Pattern > c_agentPatterns   = new ArrayList<>();
076    private static final ArrayList< Pattern > c_hostPatterns    = new ArrayList<>();
077
078    private static String c_denounceText = "";
079
080    /*
081     *  Prepares the different patterns for later use.  Compiling is
082     *  (probably) expensive, so we do it statically at class load time.
083     */
084    static {
085        try {
086            final PatternCompiler compiler = new GlobCompiler();
087            final ClassLoader loader = Denounce.class.getClassLoader();
088            final InputStream in = loader.getResourceAsStream( PROPERTYFILE );
089            if( in == null ) {
090                throw new IOException( "No property file found! (Check the installation, it should be there.)" );
091            }
092
093            final Properties props = new Properties();
094            props.load( in );
095
096            c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
097
098            for( final Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); ) {
099                final String name = (String) e.nextElement();
100
101                try {
102                    if( name.startsWith( PROP_REFERERPATTERN ) ) {
103                        c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
104                    } else if( name.startsWith( PROP_AGENTPATTERN ) ) {
105                        c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
106                    } else if( name.startsWith( PROP_HOSTPATTERN ) ) {
107                        c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
108                    }
109                } catch( final MalformedPatternException ex ) {
110                    LOG.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
111                }
112            }
113
114            LOG.debug( "Added " + c_refererPatterns.size() + c_agentPatterns.size() + c_hostPatterns.size() + " crawlers to denounce list." );
115        } catch( final IOException e ) {
116            LOG.error( "Unable to load URL patterns from " + PROPERTYFILE, e );
117        } catch( final Exception e ) {
118            LOG.error( "Unable to initialize Denounce plugin", e );
119        }
120    }
121
122    /**
123     *  {@inheritDoc}
124     */
125    @Override
126    public String execute( final Context context, final Map<String, String> params ) throws PluginException {
127        final String link = TextUtil.replaceEntities( params.get( PARAM_LINK ) );
128        //final String link = params.get( PARAM_LINK );
129        String text = params.get( PARAM_TEXT );
130        boolean linkAllowed = true;
131
132        if( link == null ) {
133            throw new PluginException( "Denounce: No parameter "+PARAM_LINK+" defined!" );
134        }
135        if( !isLinkValid( link ) ) {
136            throw new PluginException( "Denounce: Not a valid link " + link );
137        }
138
139        final HttpServletRequest request = context.getHttpRequest();
140        if( request != null ) {
141            linkAllowed = !matchHeaders( request );
142        }
143
144        if( text == null ) {
145            text = link;
146        }
147
148        if( linkAllowed ) {
149            return "<a href=\"" + link + "\">" + TextUtil.replaceEntities( text ) + "</a>";
150        }
151
152        return c_denounceText;
153    }
154
155    boolean isLinkValid( final String link ) {
156        try {
157            new URL( link ).toURI().parseServerAuthority();
158        } catch ( final Exception e ) {
159            LOG.debug( "invalid link {} - {}", link, e.getMessage() );
160            return false;
161        }
162        return true;
163    }
164
165    /**
166     *  Returns true, if the path is found among the referers.
167     */
168    private boolean matchPattern( final List< Pattern > list, final String path ) {
169        final PatternMatcher matcher = new Perl5Matcher();
170        return list.stream().anyMatch(pattern -> matcher.matches(path, pattern));
171    }
172
173    private boolean matchHeaders( final HttpServletRequest request ) {
174        //  User Agent
175        final String userAgent = request.getHeader( "User-Agent" );
176        if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) ) {
177            LOG.debug( "Matched user agent " + userAgent + " for denounce." );
178            return true;
179        }
180
181        //  Referrer header
182        final String refererPath = request.getHeader( "Referer" );
183        if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) ) {
184            LOG.debug( "Matched referer " + refererPath + " for denounce." );
185            return true;
186        }
187
188        //  Host
189        final String host = request.getRemoteHost();
190        if( host != null && matchPattern( c_hostPatterns, host ) ) {
191            LOG.debug( "Matched host " + host + " for denounce." );
192            return true;
193        }
194
195        return false;
196    }
197
198}