001/* 002 Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi) 003 004 Licensed to the Apache Software Foundation (ASF) under one 005 or more contributor license agreements. See the NOTICE file 006 distributed with this work for additional information 007 regarding copyright ownership. The ASF licenses this file 008 to you under the Apache License, Version 2.0 (the 009 "License"); you may not use this file except in compliance 010 with the License. You may obtain a copy of the License at 011 012 http://www.apache.org/licenses/LICENSE-2.0 013 014 Unless required by applicable law or agreed to in writing, 015 software distributed under the License is distributed on an 016 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 KIND, either express or implied. See the License for the 018 specific language governing permissions and limitations 019 under the License. 020 */ 021package org.apache.wiki.plugin; 022 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.apache.oro.text.GlobCompiler; 026import org.apache.oro.text.regex.MalformedPatternException; 027import org.apache.oro.text.regex.Pattern; 028import org.apache.oro.text.regex.PatternCompiler; 029import org.apache.oro.text.regex.PatternMatcher; 030import org.apache.oro.text.regex.Perl5Matcher; 031import org.apache.wiki.api.core.Context; 032import org.apache.wiki.api.exceptions.PluginException; 033import org.apache.wiki.api.plugin.Plugin; 034import org.apache.wiki.util.TextUtil; 035 036import javax.servlet.http.HttpServletRequest; 037import java.io.IOException; 038import java.io.InputStream; 039import java.net.URL; 040import java.util.ArrayList; 041import java.util.Enumeration; 042import java.util.List; 043import java.util.Map; 044import java.util.Properties; 045 046/** 047 * Denounces a link by removing it from any search engine. 048 * <br> The bots are listed in org/apache/wiki/plugin/denounce.properties. 049 * 050 * <p>Parameters : </p> 051 * <ul> 052 * <li><b>link</b> - The link to be denounced, this parameter is required</li> 053 * <li><b>text</b> - The text to use, defaults to the link</li> 054 * </ul> 055 * 056 * @since 2.1.40. 057 */ 058public class Denounce implements Plugin { 059 060 private static final Logger log = LogManager.getLogger( Denounce.class ); 061 062 /** Parameter name for setting the link. Value is <tt>{@value}</tt>. */ 063 public static final String PARAM_LINK = "link"; 064 /** Parameter name for setting the text. Value is <tt>{@value}</tt>. */ 065 public static final String PARAM_TEXT = "text"; 066 067 private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties"; 068 private static final String PROP_AGENTPATTERN = "denounce.agentpattern."; 069 private static final String PROP_HOSTPATTERN = "denounce.hostpattern."; 070 private static final String PROP_REFERERPATTERN = "denounce.refererpattern."; 071 072 private static final String PROP_DENOUNCETEXT = "denounce.denouncetext"; 073 074 private static final ArrayList< Pattern > c_refererPatterns = new ArrayList<>(); 075 private static final ArrayList< Pattern > c_agentPatterns = new ArrayList<>(); 076 private static final ArrayList< Pattern > c_hostPatterns = new ArrayList<>(); 077 078 private static String c_denounceText = ""; 079 080 /* 081 * Prepares the different patterns for later use. Compiling is 082 * (probably) expensive, so we do it statically at class load time. 083 */ 084 static { 085 try { 086 final PatternCompiler compiler = new GlobCompiler(); 087 final ClassLoader loader = Denounce.class.getClassLoader(); 088 final InputStream in = loader.getResourceAsStream( PROPERTYFILE ); 089 if( in == null ) { 090 throw new IOException( "No property file found! (Check the installation, it should be there.)" ); 091 } 092 093 final Properties props = new Properties(); 094 props.load( in ); 095 096 c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText ); 097 098 for( final Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); ) { 099 final String name = (String) e.nextElement(); 100 101 try { 102 if( name.startsWith( PROP_REFERERPATTERN ) ) { 103 c_refererPatterns.add( compiler.compile( props.getProperty(name) ) ); 104 } else if( name.startsWith( PROP_AGENTPATTERN ) ) { 105 c_agentPatterns.add( compiler.compile( props.getProperty(name) ) ); 106 } else if( name.startsWith( PROP_HOSTPATTERN ) ) { 107 c_hostPatterns.add( compiler.compile( props.getProperty(name) ) ); 108 } 109 } catch( final MalformedPatternException ex ) { 110 log.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex ); 111 } 112 } 113 114 log.debug( "Added " + c_refererPatterns.size() + c_agentPatterns.size() + c_hostPatterns.size() + " crawlers to denounce list." ); 115 } catch( final IOException e ) { 116 log.error( "Unable to load URL patterns from " + PROPERTYFILE, e ); 117 } catch( final Exception e ) { 118 log.error( "Unable to initialize Denounce plugin", e ); 119 } 120 } 121 122 /** 123 * {@inheritDoc} 124 */ 125 @Override 126 public String execute( final Context context, final Map<String, String> params ) throws PluginException { 127 final String link = params.get( PARAM_LINK ); 128 String text = params.get( PARAM_TEXT ); 129 boolean linkAllowed = true; 130 131 if( link == null ) { 132 throw new PluginException( "Denounce: No parameter "+PARAM_LINK+" defined!" ); 133 } 134 if( !isLinkValid( link ) ) { 135 throw new PluginException( "Denounce: Not a valid link " + link ); 136 } 137 138 final HttpServletRequest request = context.getHttpRequest(); 139 if( request != null ) { 140 linkAllowed = !matchHeaders( request ); 141 } 142 143 if( text == null ) { 144 text = link; 145 } 146 147 if( linkAllowed ) { 148 return "<a href=\"" + link + "\">" + TextUtil.replaceEntities( text ) + "</a>"; 149 } 150 151 return c_denounceText; 152 } 153 154 boolean isLinkValid( final String link ) { 155 try { 156 new URL( link ).toURI().parseServerAuthority(); 157 } catch ( final Exception e ) { 158 log.debug( "invalid link {} - {}", link, e.getMessage() ); 159 return false; 160 } 161 return true; 162 } 163 164 /** 165 * Returns true, if the path is found among the referers. 166 */ 167 private boolean matchPattern( final List< Pattern > list, final String path ) { 168 final PatternMatcher matcher = new Perl5Matcher(); 169 for( final Pattern pattern : list ) { 170 if( matcher.matches( path, pattern ) ) { 171 return true; 172 } 173 } 174 return false; 175 } 176 177 private boolean matchHeaders( final HttpServletRequest request ) { 178 // User Agent 179 final String userAgent = request.getHeader( "User-Agent" ); 180 if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) ) { 181 log.debug( "Matched user agent " + userAgent + " for denounce." ); 182 return true; 183 } 184 185 // Referrer header 186 final String refererPath = request.getHeader( "Referer" ); 187 if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) ) { 188 log.debug( "Matched referer " + refererPath + " for denounce." ); 189 return true; 190 } 191 192 // Host 193 final String host = request.getRemoteHost(); 194 if( host != null && matchPattern( c_hostPatterns, host ) ) { 195 log.debug( "Matched host " + host + " for denounce." ); 196 return true; 197 } 198 199 return false; 200 } 201 202}