001/* 002 Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi) 003 004 Licensed to the Apache Software Foundation (ASF) under one 005 or more contributor license agreements. See the NOTICE file 006 distributed with this work for additional information 007 regarding copyright ownership. The ASF licenses this file 008 to you under the Apache License, Version 2.0 (the 009 "License"); you may not use this file except in compliance 010 with the License. You may obtain a copy of the License at 011 012 http://www.apache.org/licenses/LICENSE-2.0 013 014 Unless required by applicable law or agreed to in writing, 015 software distributed under the License is distributed on an 016 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 KIND, either express or implied. See the License for the 018 specific language governing permissions and limitations 019 under the License. 020 */ 021package org.apache.wiki.plugin; 022 023import org.apache.log4j.Logger; 024import org.apache.oro.text.GlobCompiler; 025import org.apache.oro.text.regex.MalformedPatternException; 026import org.apache.oro.text.regex.Pattern; 027import org.apache.oro.text.regex.PatternCompiler; 028import org.apache.oro.text.regex.PatternMatcher; 029import org.apache.oro.text.regex.Perl5Matcher; 030import org.apache.wiki.api.core.Context; 031import org.apache.wiki.api.exceptions.PluginException; 032import org.apache.wiki.api.plugin.Plugin; 033import org.apache.wiki.util.TextUtil; 034 035import javax.servlet.http.HttpServletRequest; 036import java.io.IOException; 037import java.io.InputStream; 038import java.util.ArrayList; 039import java.util.Enumeration; 040import java.util.List; 041import java.util.Map; 042import java.util.Properties; 043 044/** 045 * Denounces a link by removing it from any search engine. 046 * <br> The bots are listed in org/apache/wiki/plugin/denounce.properties. 047 * 048 * <p>Parameters : </p> 049 * <ul> 050 * <li><b>link</b> - The link to be denounced, this parameter is required</li> 051 * <li><b>text</b> - The text to use, defaults to the link</li> 052 * </ul> 053 * 054 * @since 2.1.40. 055 */ 056public class Denounce implements Plugin { 057 058 private static final Logger log = Logger.getLogger(Denounce.class); 059 060 /** Parameter name for setting the link. Value is <tt>{@value}</tt>. */ 061 public static final String PARAM_LINK = "link"; 062 /** Parameter name for setting the text. Value is <tt>{@value}</tt>. */ 063 public static final String PARAM_TEXT = "text"; 064 065 private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties"; 066 private static final String PROP_AGENTPATTERN = "denounce.agentpattern."; 067 private static final String PROP_HOSTPATTERN = "denounce.hostpattern."; 068 private static final String PROP_REFERERPATTERN = "denounce.refererpattern."; 069 070 private static final String PROP_DENOUNCETEXT = "denounce.denouncetext"; 071 072 private static ArrayList<Pattern> c_refererPatterns = new ArrayList<>(); 073 private static ArrayList<Pattern> c_agentPatterns = new ArrayList<>(); 074 private static ArrayList<Pattern> c_hostPatterns = new ArrayList<>(); 075 076 private static String c_denounceText = ""; 077 078 /** 079 * Prepares the different patterns for later use. Compiling is 080 * (probably) expensive, so we do it statically at class load time. 081 */ 082 static 083 { 084 try 085 { 086 final PatternCompiler compiler = new GlobCompiler(); 087 final ClassLoader loader = Denounce.class.getClassLoader(); 088 089 final InputStream in = loader.getResourceAsStream( PROPERTYFILE ); 090 091 if( in == null ) 092 { 093 throw new IOException("No property file found! (Check the installation, it should be there.)"); 094 } 095 096 final Properties props = new Properties(); 097 props.load( in ); 098 099 c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText ); 100 101 for( final Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); ) 102 { 103 final String name = (String) e.nextElement(); 104 105 try 106 { 107 if( name.startsWith( PROP_REFERERPATTERN ) ) 108 { 109 c_refererPatterns.add( compiler.compile( props.getProperty(name) ) ); 110 } 111 else if( name.startsWith( PROP_AGENTPATTERN ) ) 112 { 113 c_agentPatterns.add( compiler.compile( props.getProperty(name) ) ); 114 } 115 else if( name.startsWith( PROP_HOSTPATTERN ) ) 116 { 117 c_hostPatterns.add( compiler.compile( props.getProperty(name) ) ); 118 } 119 } 120 catch( final MalformedPatternException ex ) 121 { 122 log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex ); 123 } 124 } 125 126 log.debug("Added "+c_refererPatterns.size()+c_agentPatterns.size()+c_hostPatterns.size()+" crawlers to denounce list."); 127 } 128 catch( final IOException e ) 129 { 130 log.error( "Unable to load URL patterns from "+PROPERTYFILE, e ); 131 } 132 catch( final Exception e ) 133 { 134 log.error( "Unable to initialize Denounce plugin", e ); 135 } 136 } 137 138 /** 139 * {@inheritDoc} 140 */ 141 @Override 142 public String execute( final Context context, final Map<String, String> params ) throws PluginException { 143 final String link = params.get( PARAM_LINK ); 144 String text = params.get( PARAM_TEXT ); 145 boolean linkAllowed = true; 146 147 if( link == null ) 148 { 149 throw new PluginException("Denounce: No parameter "+PARAM_LINK+" defined!"); 150 } 151 152 final HttpServletRequest request = context.getHttpRequest(); 153 154 if( request != null ) 155 { 156 linkAllowed = !matchHeaders( request ); 157 } 158 159 if( text == null ) text = link; 160 161 if( linkAllowed ) 162 { 163 // FIXME: Should really call TranslatorReader 164 return "<a href=\""+link+"\">"+ TextUtil.replaceEntities(text) +"</a>"; 165 } 166 167 return c_denounceText; 168 } 169 170 /** 171 * Returns true, if the path is found among the referers. 172 */ 173 private boolean matchPattern( final List< Pattern > list, final String path ) { 174 final PatternMatcher matcher = new Perl5Matcher(); 175 for( final Pattern pattern : list ) { 176 if( matcher.matches( path, pattern ) ) { 177 return true; 178 } 179 } 180 181 return false; 182 } 183 184 // FIXME: Should really return immediately when a match is found. 185 186 private boolean matchHeaders( final HttpServletRequest request ) 187 { 188 // 189 // User Agent 190 // 191 192 final String userAgent = request.getHeader("User-Agent"); 193 194 if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) ) 195 { 196 log.debug("Matched user agent "+userAgent+" for denounce."); 197 return true; 198 } 199 200 // 201 // Referrer header 202 // 203 204 final String refererPath = request.getHeader("Referer"); 205 206 if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) ) 207 { 208 log.debug("Matched referer "+refererPath+" for denounce."); 209 return true; 210 } 211 212 // 213 // Host 214 // 215 216 final String host = request.getRemoteHost(); 217 218 if( host != null && matchPattern( c_hostPatterns, host ) ) 219 { 220 log.debug("Matched host "+host+" for denounce."); 221 return true; 222 } 223 224 return false; 225 } 226}