001    /* 
002        Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003    
004        Licensed to the Apache Software Foundation (ASF) under one
005        or more contributor license agreements.  See the NOTICE file
006        distributed with this work for additional information
007        regarding copyright ownership.  The ASF licenses this file
008        to you under the Apache License, Version 2.0 (the
009        "License"); you may not use this file except in compliance
010        with the License.  You may obtain a copy of the License at
011    
012           http://www.apache.org/licenses/LICENSE-2.0
013    
014        Unless required by applicable law or agreed to in writing,
015        software distributed under the License is distributed on an
016        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017        KIND, either express or implied.  See the License for the
018        specific language governing permissions and limitations
019        under the License.  
020     */
021    package org.apache.wiki.plugin;
022    
023    import java.io.IOException;
024    import java.io.InputStream;
025    import java.util.*;
026    
027    import javax.servlet.http.HttpServletRequest;
028    
029    import org.apache.log4j.Logger;
030    import org.apache.oro.text.GlobCompiler;
031    import org.apache.oro.text.regex.*;
032    import org.apache.wiki.WikiContext;
033    import org.apache.wiki.api.exceptions.PluginException;
034    import org.apache.wiki.api.plugin.WikiPlugin;
035    
036    /**
037     *  Denounces a link by removing it from any search engine. 
038     *  <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
039     *  
040     *  <p>Parameters : </p>
041     *  <ul>
042     *  <li><b>link</b> - The link to be denounced, this parameter is required</li>
043     *  <li><b>text</b> - The text to use, defaults to the link</li>
044     *  </ul>
045     *
046     *  @since 2.1.40.
047     */
048    public class Denounce implements WikiPlugin
049    {
050        private static Logger     log = Logger.getLogger(Denounce.class);
051    
052        /** Parameter name for setting the link.  Value is <tt>{@value}</tt>. */
053        public static final String PARAM_LINK = "link";
054        /** Parameter name for setting the text.  Value is <tt>{@value}</tt>. */
055        public static final String PARAM_TEXT = "text";
056    
057        private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
058        private static final String PROP_AGENTPATTERN   = "denounce.agentpattern.";
059        private static final String PROP_HOSTPATTERN    = "denounce.hostpattern.";
060        private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
061    
062        private static final String PROP_DENOUNCETEXT   = "denounce.denouncetext";
063    
064        private static ArrayList<Pattern> c_refererPatterns = new ArrayList<Pattern>();
065        private static ArrayList<Pattern> c_agentPatterns   = new ArrayList<Pattern>();
066        private static ArrayList<Pattern> c_hostPatterns    = new ArrayList<Pattern>();
067    
068        private static String    c_denounceText    = "";
069    
070        /**
071         *  Prepares the different patterns for later use.  Compiling is
072         *  (probably) expensive, so we do it statically at class load time.
073         */
074        static
075        {
076            try
077            {
078                PatternCompiler compiler = new GlobCompiler();
079                ClassLoader loader = Denounce.class.getClassLoader();
080    
081                InputStream in = loader.getResourceAsStream( PROPERTYFILE );
082    
083                if( in == null )
084                {
085                    throw new IOException("No property file found! (Check the installation, it should be there.)");
086                }
087    
088                Properties props = new Properties();
089                props.load( in );
090    
091                c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
092    
093                for( Enumeration e = props.propertyNames(); e.hasMoreElements(); )
094                {
095                    String name = (String) e.nextElement();
096    
097                    try 
098                    {
099                        if( name.startsWith( PROP_REFERERPATTERN ) )
100                        {
101                            c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
102                        }
103                        else if( name.startsWith( PROP_AGENTPATTERN ) )
104                        {
105                            c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
106                        }
107                        else if( name.startsWith( PROP_HOSTPATTERN ) )
108                        {
109                            c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
110                        }
111                    }
112                    catch( MalformedPatternException ex )
113                    {
114                        log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
115                    }
116                }
117    
118                log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list.");
119            }
120            catch( IOException e )
121            {
122                log.error( "Unable to load URL patterns from "+PROPERTYFILE, e );
123            }
124            catch( Exception e )
125            {
126                log.error( "Unable to initialize Denounce plugin", e );
127            }
128        }
129    
130        /**
131         *  {@inheritDoc}
132         */
133        public String execute( WikiContext context, Map<String, String> params )
134            throws PluginException
135        {
136            String link = params.get( PARAM_LINK );
137            String text = params.get( PARAM_TEXT );
138            boolean linkAllowed = true;
139    
140            if( link == null )
141            {
142                throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!");
143            }
144    
145            HttpServletRequest request = context.getHttpRequest();
146    
147            if( request != null )
148            {
149                linkAllowed = !matchHeaders( request );
150            }
151    
152            if( text == null ) text = link;
153    
154            if( linkAllowed )
155            {
156                // FIXME: Should really call TranslatorReader
157                return "<a href=\""+link+"\">"+text+"</a>";
158            }
159    
160            return c_denounceText;
161        }
162    
163        /**
164         *  Returns true, if the path is found among the referers.
165         */
166        private boolean matchPattern( List list, String path )
167        {
168            PatternMatcher matcher = new Perl5Matcher();
169    
170            for( Iterator i = list.iterator(); i.hasNext(); )
171            {
172                if( matcher.matches( path, (Pattern)i.next() ) )
173                {
174                    return true;
175                }
176            }
177    
178            return false;
179        }
180    
181        // FIXME: Should really return immediately when a match is found.
182    
183        private boolean matchHeaders( HttpServletRequest request )
184        {
185            //
186            //  User Agent
187            //
188    
189            String userAgent = request.getHeader("User-Agent");
190    
191            if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) )
192            {
193                log.debug("Matched user agent "+userAgent+" for denounce.");
194                return true;
195            }
196    
197            //
198            //  Referrer header
199            //
200    
201            String refererPath = request.getHeader("Referer");
202    
203            if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) )
204            {
205                log.debug("Matched referer "+refererPath+" for denounce.");
206                return true;
207            }
208    
209            //
210            //  Host
211            // 
212    
213            String host = request.getRemoteHost();
214    
215            if( host != null && matchPattern( c_hostPatterns, host ) )
216            {
217                log.debug("Matched host "+host+" for denounce.");
218                return true;
219            }
220    
221            return false;
222        }
223    }