001/* 002 Copyright (C) 2003 Janne Jalkanen (Janne.Jalkanen@iki.fi) 003 004 Licensed to the Apache Software Foundation (ASF) under one 005 or more contributor license agreements. See the NOTICE file 006 distributed with this work for additional information 007 regarding copyright ownership. The ASF licenses this file 008 to you under the Apache License, Version 2.0 (the 009 "License"); you may not use this file except in compliance 010 with the License. You may obtain a copy of the License at 011 012 http://www.apache.org/licenses/LICENSE-2.0 013 014 Unless required by applicable law or agreed to in writing, 015 software distributed under the License is distributed on an 016 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 017 KIND, either express or implied. See the License for the 018 specific language governing permissions and limitations 019 under the License. 020 */ 021package org.apache.wiki.plugin; 022 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.apache.oro.text.GlobCompiler; 026import org.apache.oro.text.regex.MalformedPatternException; 027import org.apache.oro.text.regex.Pattern; 028import org.apache.oro.text.regex.PatternCompiler; 029import org.apache.oro.text.regex.PatternMatcher; 030import org.apache.oro.text.regex.Perl5Matcher; 031import org.apache.wiki.api.core.Context; 032import org.apache.wiki.api.exceptions.PluginException; 033import org.apache.wiki.api.plugin.Plugin; 034import org.apache.wiki.util.TextUtil; 035 036import javax.servlet.http.HttpServletRequest; 037import java.io.IOException; 038import java.io.InputStream; 039import java.net.URL; 040import java.util.ArrayList; 041import java.util.Enumeration; 042import java.util.List; 043import java.util.Map; 044import java.util.Properties; 045 046/** 047 * Denounces a link by removing it from any search engine. 048 * <br> The bots are listed in org/apache/wiki/plugin/denounce.properties. 049 * 050 * <p>Parameters : </p> 051 * <ul> 052 * <li><b>link</b> - The link to be denounced, this parameter is required</li> 053 * <li><b>text</b> - The text to use, defaults to the link</li> 054 * </ul> 055 * 056 * @since 2.1.40. 057 */ 058public class Denounce implements Plugin { 059 060 private static final Logger LOG = LogManager.getLogger( Denounce.class ); 061 062 /** Parameter name for setting the link. Value is <tt>{@value}</tt>. */ 063 public static final String PARAM_LINK = "link"; 064 /** Parameter name for setting the text. Value is <tt>{@value}</tt>. */ 065 public static final String PARAM_TEXT = "text"; 066 067 private static final String PROPERTYFILE = "org/apache/wiki/plugin/denounce.properties"; 068 private static final String PROP_AGENTPATTERN = "denounce.agentpattern."; 069 private static final String PROP_HOSTPATTERN = "denounce.hostpattern."; 070 private static final String PROP_REFERERPATTERN = "denounce.refererpattern."; 071 072 private static final String PROP_DENOUNCETEXT = "denounce.denouncetext"; 073 074 private static final ArrayList< Pattern > c_refererPatterns = new ArrayList<>(); 075 private static final ArrayList< Pattern > c_agentPatterns = new ArrayList<>(); 076 private static final ArrayList< Pattern > c_hostPatterns = new ArrayList<>(); 077 078 private static String c_denounceText = ""; 079 080 /* 081 * Prepares the different patterns for later use. Compiling is 082 * (probably) expensive, so we do it statically at class load time. 083 */ 084 static { 085 try { 086 final PatternCompiler compiler = new GlobCompiler(); 087 final ClassLoader loader = Denounce.class.getClassLoader(); 088 final InputStream in = loader.getResourceAsStream( PROPERTYFILE ); 089 if( in == null ) { 090 throw new IOException( "No property file found! (Check the installation, it should be there.)" ); 091 } 092 093 final Properties props = new Properties(); 094 props.load( in ); 095 096 c_denounceText = props.getProperty( PROP_DENOUNCETEXT, c_denounceText ); 097 098 for( final Enumeration< ? > e = props.propertyNames(); e.hasMoreElements(); ) { 099 final String name = (String) e.nextElement(); 100 101 try { 102 if( name.startsWith( PROP_REFERERPATTERN ) ) { 103 c_refererPatterns.add( compiler.compile( props.getProperty(name) ) ); 104 } else if( name.startsWith( PROP_AGENTPATTERN ) ) { 105 c_agentPatterns.add( compiler.compile( props.getProperty(name) ) ); 106 } else if( name.startsWith( PROP_HOSTPATTERN ) ) { 107 c_hostPatterns.add( compiler.compile( props.getProperty(name) ) ); 108 } 109 } catch( final MalformedPatternException ex ) { 110 LOG.error( "Malformed URL pattern in "+PROPERTYFILE+": "+props.getProperty(name), ex ); 111 } 112 } 113 114 LOG.debug( "Added " + c_refererPatterns.size() + c_agentPatterns.size() + c_hostPatterns.size() + " crawlers to denounce list." ); 115 } catch( final IOException e ) { 116 LOG.error( "Unable to load URL patterns from " + PROPERTYFILE, e ); 117 } catch( final Exception e ) { 118 LOG.error( "Unable to initialize Denounce plugin", e ); 119 } 120 } 121 122 /** 123 * {@inheritDoc} 124 */ 125 @Override 126 public String execute( final Context context, final Map<String, String> params ) throws PluginException { 127 final String link = TextUtil.replaceEntities( params.get( PARAM_LINK ) ); 128 //final String link = params.get( PARAM_LINK ); 129 String text = params.get( PARAM_TEXT ); 130 boolean linkAllowed = true; 131 132 if( link == null ) { 133 throw new PluginException( "Denounce: No parameter "+PARAM_LINK+" defined!" ); 134 } 135 if( !isLinkValid( link ) ) { 136 throw new PluginException( "Denounce: Not a valid link " + link ); 137 } 138 139 final HttpServletRequest request = context.getHttpRequest(); 140 if( request != null ) { 141 linkAllowed = !matchHeaders( request ); 142 } 143 144 if( text == null ) { 145 text = link; 146 } 147 148 if( linkAllowed ) { 149 return "<a href=\"" + link + "\">" + TextUtil.replaceEntities( text ) + "</a>"; 150 } 151 152 return c_denounceText; 153 } 154 155 boolean isLinkValid( final String link ) { 156 try { 157 new URL( link ).toURI().parseServerAuthority(); 158 } catch ( final Exception e ) { 159 LOG.debug( "invalid link {} - {}", link, e.getMessage() ); 160 return false; 161 } 162 return true; 163 } 164 165 /** 166 * Returns true, if the path is found among the referers. 167 */ 168 private boolean matchPattern( final List< Pattern > list, final String path ) { 169 final PatternMatcher matcher = new Perl5Matcher(); 170 return list.stream().anyMatch(pattern -> matcher.matches(path, pattern)); 171 } 172 173 private boolean matchHeaders( final HttpServletRequest request ) { 174 // User Agent 175 final String userAgent = request.getHeader( "User-Agent" ); 176 if( userAgent != null && matchPattern( c_agentPatterns, userAgent ) ) { 177 LOG.debug( "Matched user agent " + userAgent + " for denounce." ); 178 return true; 179 } 180 181 // Referrer header 182 final String refererPath = request.getHeader( "Referer" ); 183 if( refererPath != null && matchPattern( c_refererPatterns, refererPath ) ) { 184 LOG.debug( "Matched referer " + refererPath + " for denounce." ); 185 return true; 186 } 187 188 // Host 189 final String host = request.getRemoteHost(); 190 if( host != null && matchPattern( c_hostPatterns, host ) ) { 191 LOG.debug( "Matched host " + host + " for denounce." ); 192 return true; 193 } 194 195 return false; 196 } 197 198}