001/* 002 Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi) 003 004 Licensed to the Apache Software Foundation (ASF) under one 005 or more contributor license agreements. See the NOTICE file 006 distributed with this work for additional information 007 regarding copyright ownership. The ASF licenses this file 008 to you under the Apache License, Version 2.0 (the 009 "License"); you may not use this file except in compliance 010 with the License. You may obtain a copy of the License at 011 012 http://www.apache.org/licenses/LICENSE-2.0 013 014 Unless required by applicable law or agreed to in writing, 015 software distributed under the License is distributed on an 016 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 KIND, either express or implied. See the License for the 018 specific language governing permissions and limitations 019 under the License. 020 */ 021package org.apache.wiki.plugin; 022 023import java.io.IOException; 024import java.io.InputStream; 025import java.util.ArrayList; 026import java.util.Enumeration; 027import java.util.Iterator; 028import java.util.List; 029import java.util.Map; 030import java.util.Properties; 031 032import javax.servlet.http.HttpServletRequest; 033 034import org.apache.log4j.Logger; 035import org.apache.oro.text.GlobCompiler; 036import org.apache.oro.text.regex.MalformedPatternException; 037import org.apache.oro.text.regex.Pattern; 038import org.apache.oro.text.regex.PatternCompiler; 039import org.apache.oro.text.regex.PatternMatcher; 040import org.apache.oro.text.regex.Perl5Matcher; 041import org.apache.wiki.WikiContext; 042import org.apache.wiki.api.exceptions.PluginException; 043import org.apache.wiki.api.plugin.WikiPlugin; 044import org.apache.wiki.util.TextUtil; 045 046/** 047 * Denounces a link by removing it from any search engine. 048 * <br> The bots are listed in org/apache/wiki/plugin/denounce.properties. 049 * 050 * <p>Parameters : </p> 051 * <ul> 052 * <li><b>link</b> - The link to be denounced, this parameter is required</li> 053 * <li><b>text</b> - The text to use, defaults to the link</li> 054 * </ul> 055 * 056 * @since 2.1.40. 057 */ 058public class Denounce implements WikiPlugin 059{ 060 private static Logger log = Logger.getLogger(Denounce.class); 061 062 /** Parameter name for setting the link. Value is <tt>{@value}</tt>. */ 063 public static final String PARAM_LINK = "link"; 064 /** Parameter name for setting the text. Value is <tt>{@value}</tt>. */ 065 public static final String PARAM_TEXT = "text"; 066 067 private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties"; 068 private static final String PROP_AGENTPATTERN = "denounce.agentpattern."; 069 private static final String PROP_HOSTPATTERN = "denounce.hostpattern."; 070 private static final String PROP_REFERERPATTERN = "denounce.refererpattern."; 071 072 private static final String PROP_DENOUNCETEXT = "denounce.denouncetext"; 073 074 private static ArrayList<Pattern> c_refererPatterns = new ArrayList<>(); 075 private static ArrayList<Pattern> c_agentPatterns = new ArrayList<>(); 076 private static ArrayList<Pattern> c_hostPatterns = new ArrayList<>(); 077 078 private static String c_denounceText = ""; 079 080 /** 081 * Prepares the different patterns for later use. Compiling is 082 * (probably) expensive, so we do it statically at class load time. 083 */ 084 static 085 { 086 try 087 { 088 PatternCompiler compiler = new GlobCompiler(); 089 ClassLoader loader = Denounce.class.getClassLoader(); 090 091 InputStream in = loader.getResourceAsStream( PROPERTYFILE ); 092 093 if( in == null ) 094 { 095 throw new IOException("No property file found! (Check the installation, it should be there.)"); 096 } 097 098 Properties props = new Properties(); 099 props.load( in ); 100 101 c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText ); 102 103 for( Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); ) 104 { 105 String name = (String) e.nextElement(); 106 107 try 108 { 109 if( name.startsWith( PROP_REFERERPATTERN ) ) 110 { 111 c_refererPatterns.add( compiler.compile( props.getProperty(name) ) ); 112 } 113 else if( name.startsWith( PROP_AGENTPATTERN ) ) 114 { 115 c_agentPatterns.add( compiler.compile( props.getProperty(name) ) ); 116 } 117 else if( name.startsWith( PROP_HOSTPATTERN ) ) 118 { 119 c_hostPatterns.add( compiler.compile( props.getProperty(name) ) ); 120 } 121 } 122 catch( MalformedPatternException ex ) 123 { 124 log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex ); 125 } 126 } 127 128 log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list."); 129 } 130 catch( IOException e ) 131 { 132 log.error( "Unable to load URL patterns from "+PROPERTYFILE, e ); 133 } 134 catch( Exception e ) 135 { 136 log.error( "Unable to initialize Denounce plugin", e ); 137 } 138 } 139 140 /** 141 * {@inheritDoc} 142 */ 143 @Override 144 public String execute( WikiContext context, Map<String, String> params ) 145 throws PluginException 146 { 147 String link = params.get( PARAM_LINK ); 148 String text = params.get( PARAM_TEXT ); 149 boolean linkAllowed = true; 150 151 if( link == null ) 152 { 153 throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!"); 154 } 155 156 HttpServletRequest request = context.getHttpRequest(); 157 158 if( request != null ) 159 { 160 linkAllowed = !matchHeaders( request ); 161 } 162 163 if( text == null ) text = link; 164 165 if( linkAllowed ) 166 { 167 // FIXME: Should really call TranslatorReader 168 return "<a href=\""+link+"\">"+ TextUtil.replaceEntities(text) +"</a>"; 169 } 170 171 return c_denounceText; 172 } 173 174 /** 175 * Returns true, if the path is found among the referers. 176 */ 177 private boolean matchPattern( List< Pattern > list, String path ) 178 { 179 PatternMatcher matcher = new Perl5Matcher(); 180 181 for( Iterator< Pattern > i = list.iterator(); i.hasNext(); ) 182 { 183 if( matcher.matches( path, i.next() ) ) 184 { 185 return true; 186 } 187 } 188 189 return false; 190 } 191 192 // FIXME: Should really return immediately when a match is found. 193 194 private boolean matchHeaders( HttpServletRequest request ) 195 { 196 // 197 // User Agent 198 // 199 200 String userAgent = request.getHeader("User-Agent"); 201 202 if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) ) 203 { 204 log.debug("Matched user agent "+userAgent+" for denounce."); 205 return true; 206 } 207 208 // 209 // Referrer header 210 // 211 212 String refererPath = request.getHeader("Referer"); 213 214 if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) ) 215 { 216 log.debug("Matched referer "+refererPath+" for denounce."); 217 return true; 218 } 219 220 // 221 // Host 222 // 223 224 String host = request.getRemoteHost(); 225 226 if( host != null && matchPattern( c_hostPatterns, host ) ) 227 { 228 log.debug("Matched host "+host+" for denounce."); 229 return true; 230 } 231 232 return false; 233 } 234}