001 /*
002 Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi)
003
004 Licensed to the Apache Software Foundation (ASF) under one
005 or more contributor license agreements. See the NOTICE file
006 distributed with this work for additional information
007 regarding copyright ownership. The ASF licenses this file
008 to you under the Apache License, Version 2.0 (the
009 "License"); you may not use this file except in compliance
010 with the License. You may obtain a copy of the License at
011
012 http://www.apache.org/licenses/LICENSE-2.0
013
014 Unless required by applicable law or agreed to in writing,
015 software distributed under the License is distributed on an
016 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
017 KIND, either express or implied. See the License for the
018 specific language governing permissions and limitations
019 under the License.
020 */
021 package org.apache.wiki.plugin;
022
023 import java.io.IOException;
024 import java.io.InputStream;
025 import java.util.*;
026
027 import javax.servlet.http.HttpServletRequest;
028
029 import org.apache.log4j.Logger;
030 import org.apache.oro.text.GlobCompiler;
031 import org.apache.oro.text.regex.*;
032 import org.apache.wiki.WikiContext;
033 import org.apache.wiki.api.exceptions.PluginException;
034 import org.apache.wiki.api.plugin.WikiPlugin;
035
036 /**
037 * Denounces a link by removing it from any search engine.
038 * <br> The bots are listed in org/apache/wiki/plugin/denounce.properties.
039 *
040 * <p>Parameters : </p>
041 * <ul>
042 * <li><b>link</b> - The link to be denounced, this parameter is required</li>
043 * <li><b>text</b> - The text to use, defaults to the link</li>
044 * </ul>
045 *
046 * @since 2.1.40.
047 */
048 public class Denounce implements WikiPlugin
049 {
050 private static Logger log = Logger.getLogger(Denounce.class);
051
052 /** Parameter name for setting the link. Value is <tt>{@value}</tt>. */
053 public static final String PARAM_LINK = "link";
054 /** Parameter name for setting the text. Value is <tt>{@value}</tt>. */
055 public static final String PARAM_TEXT = "text";
056
057 private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties";
058 private static final String PROP_AGENTPATTERN = "denounce.agentpattern.";
059 private static final String PROP_HOSTPATTERN = "denounce.hostpattern.";
060 private static final String PROP_REFERERPATTERN = "denounce.refererpattern.";
061
062 private static final String PROP_DENOUNCETEXT = "denounce.denouncetext";
063
064 private static ArrayList<Pattern> c_refererPatterns = new ArrayList<Pattern>();
065 private static ArrayList<Pattern> c_agentPatterns = new ArrayList<Pattern>();
066 private static ArrayList<Pattern> c_hostPatterns = new ArrayList<Pattern>();
067
068 private static String c_denounceText = "";
069
070 /**
071 * Prepares the different patterns for later use. Compiling is
072 * (probably) expensive, so we do it statically at class load time.
073 */
074 static
075 {
076 try
077 {
078 PatternCompiler compiler = new GlobCompiler();
079 ClassLoader loader = Denounce.class.getClassLoader();
080
081 InputStream in = loader.getResourceAsStream( PROPERTYFILE );
082
083 if( in == null )
084 {
085 throw new IOException("No property file found! (Check the installation, it should be there.)");
086 }
087
088 Properties props = new Properties();
089 props.load( in );
090
091 c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText );
092
093 for( Enumeration e = props.propertyNames(); e.hasMoreElements(); )
094 {
095 String name = (String) e.nextElement();
096
097 try
098 {
099 if( name.startsWith( PROP_REFERERPATTERN ) )
100 {
101 c_refererPatterns.add( compiler.compile( props.getProperty(name) ) );
102 }
103 else if( name.startsWith( PROP_AGENTPATTERN ) )
104 {
105 c_agentPatterns.add( compiler.compile( props.getProperty(name) ) );
106 }
107 else if( name.startsWith( PROP_HOSTPATTERN ) )
108 {
109 c_hostPatterns.add( compiler.compile( props.getProperty(name) ) );
110 }
111 }
112 catch( MalformedPatternException ex )
113 {
114 log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex );
115 }
116 }
117
118 log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list.");
119 }
120 catch( IOException e )
121 {
122 log.error( "Unable to load URL patterns from "+PROPERTYFILE, e );
123 }
124 catch( Exception e )
125 {
126 log.error( "Unable to initialize Denounce plugin", e );
127 }
128 }
129
130 /**
131 * {@inheritDoc}
132 */
133 public String execute( WikiContext context, Map<String, String> params )
134 throws PluginException
135 {
136 String link = params.get( PARAM_LINK );
137 String text = params.get( PARAM_TEXT );
138 boolean linkAllowed = true;
139
140 if( link == null )
141 {
142 throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!");
143 }
144
145 HttpServletRequest request = context.getHttpRequest();
146
147 if( request != null )
148 {
149 linkAllowed = !matchHeaders( request );
150 }
151
152 if( text == null ) text = link;
153
154 if( linkAllowed )
155 {
156 // FIXME: Should really call TranslatorReader
157 return "<a href=\""+link+"\">"+text+"</a>";
158 }
159
160 return c_denounceText;
161 }
162
163 /**
164 * Returns true, if the path is found among the referers.
165 */
166 private boolean matchPattern( List list, String path )
167 {
168 PatternMatcher matcher = new Perl5Matcher();
169
170 for( Iterator i = list.iterator(); i.hasNext(); )
171 {
172 if( matcher.matches( path, (Pattern)i.next() ) )
173 {
174 return true;
175 }
176 }
177
178 return false;
179 }
180
181 // FIXME: Should really return immediately when a match is found.
182
183 private boolean matchHeaders( HttpServletRequest request )
184 {
185 //
186 // User Agent
187 //
188
189 String userAgent = request.getHeader("User-Agent");
190
191 if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) )
192 {
193 log.debug("Matched user agent "+userAgent+" for denounce.");
194 return true;
195 }
196
197 //
198 // Referrer header
199 //
200
201 String refererPath = request.getHeader("Referer");
202
203 if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) )
204 {
205 log.debug("Matched referer "+refererPath+" for denounce.");
206 return true;
207 }
208
209 //
210 // Host
211 //
212
213 String host = request.getRemoteHost();
214
215 if( host != null && matchPattern( c_hostPatterns, host ) )
216 {
217 log.debug("Matched host "+host+" for denounce.");
218 return true;
219 }
220
221 return false;
222 }
223 }