001 /* 002 Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi) 003 004 Licensed to the Apache Software Foundation (ASF) under one 005 or more contributor license agreements. See the NOTICE file 006 distributed with this work for additional information 007 regarding copyright ownership. The ASF licenses this file 008 to you under the Apache License, Version 2.0 (the 009 "License"); you may not use this file except in compliance 010 with the License. You may obtain a copy of the License at 011 012 http://www.apache.org/licenses/LICENSE-2.0 013 014 Unless required by applicable law or agreed to in writing, 015 software distributed under the License is distributed on an 016 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 KIND, either express or implied. See the License for the 018 specific language governing permissions and limitations 019 under the License. 020 */ 021 package org.apache.wiki.plugin; 022 023 import java.io.IOException; 024 import java.io.InputStream; 025 import java.util.*; 026 027 import javax.servlet.http.HttpServletRequest; 028 029 import org.apache.log4j.Logger; 030 import org.apache.oro.text.GlobCompiler; 031 import org.apache.oro.text.regex.*; 032 import org.apache.wiki.WikiContext; 033 import org.apache.wiki.api.exceptions.PluginException; 034 import org.apache.wiki.api.plugin.WikiPlugin; 035 036 /** 037 * Denounces a link by removing it from any search engine. 038 * <br> The bots are listed in org/apache/wiki/plugin/denounce.properties. 039 * 040 * <p>Parameters : </p> 041 * <ul> 042 * <li><b>link</b> - The link to be denounced, this parameter is required</li> 043 * <li><b>text</b> - The text to use, defaults to the link</li> 044 * </ul> 045 * 046 * @since 2.1.40. 047 */ 048 public class Denounce implements WikiPlugin 049 { 050 private static Logger log = Logger.getLogger(Denounce.class); 051 052 /** Parameter name for setting the link. Value is <tt>{@value}</tt>. */ 053 public static final String PARAM_LINK = "link"; 054 /** Parameter name for setting the text. Value is <tt>{@value}</tt>. */ 055 public static final String PARAM_TEXT = "text"; 056 057 private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties"; 058 private static final String PROP_AGENTPATTERN = "denounce.agentpattern."; 059 private static final String PROP_HOSTPATTERN = "denounce.hostpattern."; 060 private static final String PROP_REFERERPATTERN = "denounce.refererpattern."; 061 062 private static final String PROP_DENOUNCETEXT = "denounce.denouncetext"; 063 064 private static ArrayList<Pattern> c_refererPatterns = new ArrayList<Pattern>(); 065 private static ArrayList<Pattern> c_agentPatterns = new ArrayList<Pattern>(); 066 private static ArrayList<Pattern> c_hostPatterns = new ArrayList<Pattern>(); 067 068 private static String c_denounceText = ""; 069 070 /** 071 * Prepares the different patterns for later use. Compiling is 072 * (probably) expensive, so we do it statically at class load time. 073 */ 074 static 075 { 076 try 077 { 078 PatternCompiler compiler = new GlobCompiler(); 079 ClassLoader loader = Denounce.class.getClassLoader(); 080 081 InputStream in = loader.getResourceAsStream( PROPERTYFILE ); 082 083 if( in == null ) 084 { 085 throw new IOException("No property file found! (Check the installation, it should be there.)"); 086 } 087 088 Properties props = new Properties(); 089 props.load( in ); 090 091 c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText ); 092 093 for( Enumeration e = props.propertyNames(); e.hasMoreElements(); ) 094 { 095 String name = (String) e.nextElement(); 096 097 try 098 { 099 if( name.startsWith( PROP_REFERERPATTERN ) ) 100 { 101 c_refererPatterns.add( compiler.compile( props.getProperty(name) ) ); 102 } 103 else if( name.startsWith( PROP_AGENTPATTERN ) ) 104 { 105 c_agentPatterns.add( compiler.compile( props.getProperty(name) ) ); 106 } 107 else if( name.startsWith( PROP_HOSTPATTERN ) ) 108 { 109 c_hostPatterns.add( compiler.compile( props.getProperty(name) ) ); 110 } 111 } 112 catch( MalformedPatternException ex ) 113 { 114 log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex ); 115 } 116 } 117 118 log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list."); 119 } 120 catch( IOException e ) 121 { 122 log.error( "Unable to load URL patterns from "+PROPERTYFILE, e ); 123 } 124 catch( Exception e ) 125 { 126 log.error( "Unable to initialize Denounce plugin", e ); 127 } 128 } 129 130 /** 131 * {@inheritDoc} 132 */ 133 public String execute( WikiContext context, Map<String, String> params ) 134 throws PluginException 135 { 136 String link = params.get( PARAM_LINK ); 137 String text = params.get( PARAM_TEXT ); 138 boolean linkAllowed = true; 139 140 if( link == null ) 141 { 142 throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!"); 143 } 144 145 HttpServletRequest request = context.getHttpRequest(); 146 147 if( request != null ) 148 { 149 linkAllowed = !matchHeaders( request ); 150 } 151 152 if( text == null ) text = link; 153 154 if( linkAllowed ) 155 { 156 // FIXME: Should really call TranslatorReader 157 return "<a href=\""+link+"\">"+text+"</a>"; 158 } 159 160 return c_denounceText; 161 } 162 163 /** 164 * Returns true, if the path is found among the referers. 165 */ 166 private boolean matchPattern( List list, String path ) 167 { 168 PatternMatcher matcher = new Perl5Matcher(); 169 170 for( Iterator i = list.iterator(); i.hasNext(); ) 171 { 172 if( matcher.matches( path, (Pattern)i.next() ) ) 173 { 174 return true; 175 } 176 } 177 178 return false; 179 } 180 181 // FIXME: Should really return immediately when a match is found. 182 183 private boolean matchHeaders( HttpServletRequest request ) 184 { 185 // 186 // User Agent 187 // 188 189 String userAgent = request.getHeader("User-Agent"); 190 191 if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) ) 192 { 193 log.debug("Matched user agent "+userAgent+" for denounce."); 194 return true; 195 } 196 197 // 198 // Referrer header 199 // 200 201 String refererPath = request.getHeader("Referer"); 202 203 if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) ) 204 { 205 log.debug("Matched referer "+refererPath+" for denounce."); 206 return true; 207 } 208 209 // 210 // Host 211 // 212 213 String host = request.getRemoteHost(); 214 215 if( host != null && matchPattern( c_hostPatterns, host ) ) 216 { 217 log.debug("Matched host "+host+" for denounce."); 218 return true; 219 } 220 221 return false; 222 } 223 }