001    /*
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.  
018     */
019    package org.apache.wiki.filters;
020    
021    import java.io.BufferedReader;
022    import java.io.IOException;
023    import java.io.InputStream;
024    import java.io.InputStreamReader;
025    import java.io.StringReader;
026    import java.io.StringWriter;
027    import java.util.ArrayList;
028    import java.util.Collection;
029    import java.util.Date;
030    import java.util.Iterator;
031    import java.util.Properties;
032    import java.util.Random;
033    import java.util.StringTokenizer;
034    import java.util.Vector;
035    
036    import javax.servlet.http.HttpServletRequest;
037    import javax.servlet.http.HttpServletResponse;
038    import javax.servlet.jsp.PageContext;
039    
040    import net.sf.akismet.Akismet;
041    
042    import org.apache.commons.lang.time.StopWatch;
043    import org.apache.log4j.Logger;
044    import org.apache.oro.text.regex.MalformedPatternException;
045    import org.apache.oro.text.regex.MatchResult;
046    import org.apache.oro.text.regex.Pattern;
047    import org.apache.oro.text.regex.PatternCompiler;
048    import org.apache.oro.text.regex.PatternMatcher;
049    import org.apache.oro.text.regex.Perl5Compiler;
050    import org.apache.oro.text.regex.Perl5Matcher;
051    import org.apache.wiki.InternalWikiException;
052    import org.apache.wiki.WikiContext;
053    import org.apache.wiki.WikiEngine;
054    import org.apache.wiki.WikiPage;
055    import org.apache.wiki.WikiProvider;
056    import org.apache.wiki.api.exceptions.ProviderException;
057    import org.apache.wiki.api.exceptions.RedirectException;
058    import org.apache.wiki.api.filters.BasicPageFilter;
059    import org.apache.wiki.attachment.Attachment;
060    import org.apache.wiki.auth.user.UserProfile;
061    import org.apache.wiki.ui.EditorManager;
062    import org.apache.wiki.util.FileUtil;
063    import org.apache.wiki.util.HttpUtil;
064    import org.apache.wiki.util.TextUtil;
065    import org.suigeneris.jrcs.diff.Diff;
066    import org.suigeneris.jrcs.diff.DifferentiationFailedException;
067    import org.suigeneris.jrcs.diff.Revision;
068    import org.suigeneris.jrcs.diff.delta.AddDelta;
069    import org.suigeneris.jrcs.diff.delta.ChangeDelta;
070    import org.suigeneris.jrcs.diff.delta.DeleteDelta;
071    import org.suigeneris.jrcs.diff.delta.Delta;
072    import org.suigeneris.jrcs.diff.myers.MyersDiff;
073    
074    
075    /**
076     *  This is Herb, the JSPWiki spamfilter that can also do choke modifications.
077     *
078     *  Parameters:
079     *  <ul>
080     *    <li>wordlist - Page name where the regexps are found.  Use [{SET spamwords='regexp list separated with spaces'}] on
081     *     that page.  Default is "SpamFilterWordList".
082     *    <li>blacklist - The name of an attachment containing the list of spam patterns, one per line. Default is
083     *        "SpamFilterWordList/blacklist.txt"</li>
084     *    <li>errorpage - The page to which the user is redirected.  Has a special variable $msg which states the reason. Default is "RejectedMessage".
085     *    <li>pagechangesinminute - How many page changes are allowed/minute.  Default is 5.</li>
086     *    <li>similarchanges - How many similar page changes are allowed before the host is banned.  Default is 2.  (since 2.4.72)</li>
087     *    <li>bantime - How long an IP address stays on the temporary ban list (default is 60 for 60 minutes).</li>
088     *    <li>maxurls - How many URLs can be added to the page before it is considered spam (default is 5)</li>
089     *    <li>akismet-apikey - The Akismet API key (see akismet.org)</li>
090     *    <li>ignoreauthenticated - If set to "true", all authenticated users are ignored and never caught in SpamFilter</li>
091     *    <li>captcha - Sets the captcha technology to use.  Current allowed values are "none" and "asirra".</li>
092     *    <li>strategy - Sets the filtering strategy to use.  If set to "eager", will stop at the first probable
093     *        match, and won't consider any other tests.  This is the default, as it's considerably lighter. If set to "score", will go through all of the tests
094     *        and calculates a score for the spam, which is then compared to a filter level value.
095     *  </ul>
096     *
097     *  <p>Please see the default editors/plain.jsp for examples on how the SpamFilter integrates
098     *  with the editor system.</p>
099     *  
100     *  <p>Changes by admin users are ignored in any case.</p>
101     *
102     *  @since 2.1.112
103     */
104    public class SpamFilter extends BasicPageFilter {
105        
106        private static final String ATTR_SPAMFILTER_SCORE = "spamfilter.score";
107        private static final String REASON_REGEXP = "Regexp";
108        private static final String REASON_IP_BANNED_TEMPORARILY = "IPBannedTemporarily";
109        private static final String REASON_BOT_TRAP = "BotTrap";
110        private static final String REASON_AKISMET = "Akismet";
111        private static final String REASON_TOO_MANY_URLS = "TooManyUrls";
112        private static final String REASON_SIMILAR_MODIFICATIONS = "SimilarModifications";
113        private static final String REASON_TOO_MANY_MODIFICATIONS = "TooManyModifications";
114        private static final String REASON_UTF8_TRAP = "UTF8Trap";
115    
116        private static final String LISTVAR = "spamwords";
117        
118        /** The filter property name for specifying the page which contains the list of spamwords.
119         *  Value is <tt>{@value}</tt>. */
120        public static final String  PROP_WORDLIST              = "wordlist";
121        
122        /** The filter property name for the page to which you are directed if Herb rejects your
123         *  edit.  Value is <tt>{@value}</tt>. */
124        public static final String  PROP_ERRORPAGE             = "errorpage";
125        
126        /** The filter property name for specifying how many changes is any given IP address
127         *  allowed to do per minute.  Value is <tt>{@value}</tt>.
128         */
129        public static final String  PROP_PAGECHANGES           = "pagechangesinminute";
130        
131        /** The filter property name for specifying how many similar changes are allowed
132         *  before a host is banned.  Value is <tt>{@value}</tt>.
133         */
134        public static final String  PROP_SIMILARCHANGES        = "similarchanges";
135        
136        /** The filter property name for specifying how long a host is banned.  Value is <tt>{@value}</tt>.*/
137        public static final String  PROP_BANTIME               = "bantime";
138        
139        /** The filter property name for the attachment containing the blacklist.  Value is <tt>{@value}</tt>.*/
140        public static final String  PROP_BLACKLIST             = "blacklist";
141        
142        /** The filter property name for specifying how many URLs can any given edit contain.  
143         *  Value is <tt>{@value}</tt> */
144        public static final String  PROP_MAXURLS               = "maxurls";
145        
146        /** The filter property name for specifying the Akismet API-key.  Value is <tt>{@value}</tt>. */
147        public static final String  PROP_AKISMET_API_KEY       = "akismet-apikey";
148        
149        /** The filter property name for specifying whether authenticated users should be ignored. Value is <tt>{@value}</tt>. */
150        public static final String  PROP_IGNORE_AUTHENTICATED  = "ignoreauthenticated";
151        
152        /** The filter property name for specifying which captcha technology should be used. Value is <tt>{@value}</tt>. */
153        public static final String  PROP_CAPTCHA               = "captcha";
154        
155        /** The filter property name for specifying which filter strategy should be used.  Value is <tt>{@value}</tt>. */
156        public static final String  PROP_FILTERSTRATEGY        = "strategy";
157    
158        /** The string specifying the "eager" strategy. Value is <tt>{@value}</tt>. */
159        public static final String  STRATEGY_EAGER             = "eager";
160        
161        /** The string specifying the "score" strategy. Value is <tt>{@value}</tt>. */
162        public static final String  STRATEGY_SCORE             = "score";
163    
164        private static final String URL_REGEXP = "(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;]+)";
165    
166        private String          m_forbiddenWordsPage = "SpamFilterWordList";
167        private String          m_errorPage          = "RejectedMessage";
168        private String          m_blacklist          = "SpamFilterWordList/blacklist.txt";
169    
170        private PatternMatcher  m_matcher = new Perl5Matcher();
171        private PatternCompiler m_compiler = new Perl5Compiler();
172    
173        private Collection<Pattern> m_spamPatterns = null;
174    
175        private Date            m_lastRebuild = new Date( 0L );
176    
177        private static  Logger  c_spamlog = Logger.getLogger( "SpamLog" );
178        private static  Logger  log = Logger.getLogger( SpamFilter.class );
179    
180    
181        private Vector<Host>    m_temporaryBanList = new Vector<Host>();
182    
183        private int             m_banTime = 60; // minutes
184    
185        private Vector<Host>    m_lastModifications = new Vector<Host>();
186    
187        /**
188         *  How many times a single IP address can change a page per minute?
189         */
190        private int             m_limitSinglePageChanges = 5;
191    
192        /**
193         *  How many times can you add the exact same string to a page?
194         */
195        private int             m_limitSimilarChanges = 2;
196    
197        /**
198         *  How many URLs can be added at maximum.
199         */
200        private int             m_maxUrls = 10;
201    
202        private Pattern         m_urlPattern;
203        private Akismet         m_akismet;
204    
205        private String          m_akismetAPIKey = null;
206    
207        private boolean         m_useCaptcha = false;
208    
209        /** The limit at which we consider something to be spam. */
210        private int             m_scoreLimit = 1;
211    
212        /**
213         * If set to true, will ignore anyone who is in Authenticated role.
214         */
215        private boolean         m_ignoreAuthenticated = false;
216    
217        private boolean         m_stopAtFirstMatch = true;
218    
219        private static String   c_hashName;
220        private static long     c_lastUpdate;
221    
222        /** The HASH_DELAY value is a maximum amount of time that an user can keep
223         *  a session open, because after the value has expired, we will invent a new
224         *  hash field name.  By default this is {@value} hours, which should be ample
225         *  time for someone.
226         */
227        private static final long HASH_DELAY = 24;
228    
229    
230        /**
231         *  {@inheritDoc}
232         */
233        @Override
234        public void initialize( WikiEngine engine, Properties properties ) {
235            m_forbiddenWordsPage = properties.getProperty( PROP_WORDLIST, m_forbiddenWordsPage );
236            m_errorPage = properties.getProperty( PROP_ERRORPAGE, m_errorPage );
237            m_limitSinglePageChanges = TextUtil.getIntegerProperty( properties,
238                                                                    PROP_PAGECHANGES,
239                                                                    m_limitSinglePageChanges );
240            
241            m_limitSimilarChanges = TextUtil.getIntegerProperty( properties,
242                                                                 PROP_SIMILARCHANGES,
243                                                                 m_limitSimilarChanges );
244    
245            m_maxUrls = TextUtil.getIntegerProperty( properties, PROP_MAXURLS, m_maxUrls );
246            m_banTime = TextUtil.getIntegerProperty( properties, PROP_BANTIME, m_banTime );
247            m_blacklist = properties.getProperty( PROP_BLACKLIST, m_blacklist );
248    
249            m_ignoreAuthenticated = TextUtil.getBooleanProperty( properties,
250                                                                 PROP_IGNORE_AUTHENTICATED,
251                                                                 m_ignoreAuthenticated );
252    
253            m_useCaptcha = properties.getProperty( PROP_CAPTCHA, "" ).equals("asirra");
254    
255            try {
256                m_urlPattern = m_compiler.compile( URL_REGEXP );
257            } catch( MalformedPatternException e ) {
258                log.fatal( "Internal error: Someone put in a faulty pattern.", e );
259                throw new InternalWikiException( "Faulty pattern." );
260            }
261    
262            m_akismetAPIKey = TextUtil.getStringProperty( properties,
263                                                          PROP_AKISMET_API_KEY,
264                                                          m_akismetAPIKey );
265    
266            m_stopAtFirstMatch = TextUtil.getStringProperty( properties,
267                                                             PROP_FILTERSTRATEGY,
268                                                             STRATEGY_EAGER ).equals( STRATEGY_EAGER );
269    
270            log.info( "# Spam filter initialized.  Temporary ban time " + m_banTime +
271                      " mins, max page changes/minute: " + m_limitSinglePageChanges );
272    
273    
274        }
275    
276        private static final int REJECT = 0;
277        private static final int ACCEPT = 1;
278        private static final int NOTE   = 2;
279    
280        private static String log( WikiContext ctx, int type, String source, String message ) {
281            message = TextUtil.replaceString( message, "\r\n", "\\r\\n" );
282            message = TextUtil.replaceString( message, "\"", "\\\"" );
283    
284            String uid = getUniqueID();
285    
286            String page   = ctx.getPage().getName();
287            String reason = "UNKNOWN";
288            String addr   = ctx.getHttpRequest() != null ? HttpUtil.getRemoteAddress( ctx.getHttpRequest() ) : "-";
289    
290            switch( type ) {
291                case REJECT:
292                    reason = "REJECTED";
293                    break;
294                case ACCEPT:
295                    reason = "ACCEPTED";
296                    break;
297                case NOTE:
298                    reason = "NOTE";
299                    break;
300                default:
301                    throw new InternalWikiException( "Illegal type " + type );
302            }
303            c_spamlog.info( reason + " " + source + " " + uid + " " + addr + " \"" + page + "\" " + message );
304    
305            return uid;
306        }
307    
308        /** {@inheritDoc} */
309        public String preSave( WikiContext context, String content ) throws RedirectException {
310            cleanBanList();
311            refreshBlacklists( context );
312            Change change = getChange( context, content );
313    
314            if( !ignoreThisUser( context ) ) {
315                checkBanList( context, change );
316                checkSinglePageChange( context, content, change );
317                checkPatternList( context, content, change );
318            }
319    
320            if( !m_stopAtFirstMatch ) {
321                Integer score = ( Integer )context.getVariable( ATTR_SPAMFILTER_SCORE );
322    
323                if( score != null && score.intValue() >= m_scoreLimit ) {
324                    throw new RedirectException( "Herb says you got too many points", getRedirectPage( context ) );
325                }
326            }
327    
328            log( context, ACCEPT, "-", change.toString() );
329            return content;
330        }
331    
332        private void checkStrategy( WikiContext context, String error, String message ) throws RedirectException {
333            if( m_stopAtFirstMatch ) {
334                throw new RedirectException( message, getRedirectPage( context ) );
335            }
336    
337            Integer score = ( Integer )context.getVariable( ATTR_SPAMFILTER_SCORE );
338            if( score != null ) {
339                score = score + 1;
340            } else {
341                score = 1;
342            }
343    
344            context.setVariable( ATTR_SPAMFILTER_SCORE, score );
345        }
346        
347        /**
348         *  Parses a list of patterns and returns a Collection of compiled Pattern
349         *  objects.
350         *
351         * @param source
352         * @param list
353         * @return A Collection of the Patterns that were found from the lists.
354         */
355        private Collection< Pattern > parseWordList( WikiPage source, String list ) {
356            ArrayList< Pattern > compiledpatterns = new ArrayList< Pattern >();
357    
358            if( list != null ) {
359                StringTokenizer tok = new StringTokenizer( list, " \t\n" );
360    
361                while( tok.hasMoreTokens() ) {
362                    String pattern = tok.nextToken();
363    
364                    try {
365                        compiledpatterns.add( m_compiler.compile( pattern ) );
366                    } catch( MalformedPatternException e ) {
367                        log.debug( "Malformed spam filter pattern " + pattern );
368                        source.setAttribute("error", "Malformed spam filter pattern " + pattern);
369                    }
370                }
371            }
372    
373            return compiledpatterns;
374        }
375    
376        /**
377         *  Takes a MT-Blacklist -formatted blacklist and returns a list of compiled Pattern objects.
378         *
379         *  @param list
380         *  @return The parsed blacklist patterns.
381         */
382        private Collection< Pattern > parseBlacklist( String list ) {
383            ArrayList< Pattern > compiledpatterns = new ArrayList< Pattern >();
384    
385            if( list != null ) {
386                try {
387                    BufferedReader in = new BufferedReader( new StringReader(list) );
388                    String line;
389                    while( (line = in.readLine() ) != null ) {
390                        line = line.trim();
391                        if( line.length() == 0 ) continue; // Empty line
392                        if( line.startsWith("#") ) continue; // It's a comment
393    
394                        int ws = line.indexOf( ' ' );
395                        if( ws == -1 ) ws = line.indexOf( '\t' );
396                        if( ws != -1 ) line = line.substring( 0, ws );
397    
398                        try {
399                            compiledpatterns.add( m_compiler.compile( line ) );
400                        } catch( MalformedPatternException e ) {
401                            log.debug( "Malformed spam filter pattern " + line );
402                        }
403                    }
404                } catch( IOException e ) {
405                    log.info( "Could not read patterns; returning what I got" , e );
406                }
407            }
408    
409            return compiledpatterns;
410        }
411    
412        /**
413         *  Takes a single page change and performs a load of tests on the content change.
414         *  An admin can modify anything.
415         *
416         *  @param context
417         *  @param content
418         *  @throws RedirectException
419         */
420        private synchronized void checkSinglePageChange( WikiContext context, String content, Change change ) 
421                throws RedirectException {
422            HttpServletRequest req = context.getHttpRequest();
423    
424            if( req != null ) {
425                String addr = HttpUtil.getRemoteAddress( req );
426                int hostCounter = 0;
427                int changeCounter = 0;
428    
429                log.debug( "Change is " + change.m_change );
430    
431                long time = System.currentTimeMillis() - 60*1000L; // 1 minute
432    
433                for( Iterator< Host > i = m_lastModifications.iterator(); i.hasNext(); ) {
434                    Host host = i.next();
435    
436                    //
437                    //  Check if this item is invalid
438                    //
439                    if( host.getAddedTime() < time ) {
440                        log.debug( "Removed host " + host.getAddress() + " from modification queue (expired)" );
441                        i.remove();
442                        continue;
443                    }
444    
445                    //
446                    // Check if this IP address has been seen before
447                    //
448    
449                    if( host.getAddress().equals( addr ) ) {
450                        hostCounter++;
451                    }
452    
453                    //
454                    //  Check, if this change has been seen before
455                    //
456    
457                    if( host.getChange() != null && host.getChange().equals( change ) ) {
458                        changeCounter++;
459                    }
460                }
461    
462                //
463                //  Now, let's check against the limits.
464                //
465                if( hostCounter >= m_limitSinglePageChanges ) {
466                    Host host = new Host( addr, null );
467                    m_temporaryBanList.add( host );
468    
469                    String uid = log( context, REJECT, REASON_TOO_MANY_MODIFICATIONS, change.m_change );
470                    log.info( "SPAM:TooManyModifications (" + uid + "). Added host " + addr + " to temporary ban list for doing too many modifications/minute" );
471                    checkStrategy( context, REASON_TOO_MANY_MODIFICATIONS, "Herb says you look like a spammer, and I trust Herb! (Incident code " + uid + ")" );
472                }
473    
474                if( changeCounter >= m_limitSimilarChanges ) {
475                    Host host = new Host( addr, null );
476                    m_temporaryBanList.add( host );
477    
478                    String uid = log( context, REJECT, REASON_SIMILAR_MODIFICATIONS, change.m_change );
479                    log.info( "SPAM:SimilarModifications (" + uid + "). Added host " + addr + " to temporary ban list for doing too many similar modifications" );
480                    checkStrategy( context, REASON_SIMILAR_MODIFICATIONS, "Herb says you look like a spammer, and I trust Herb! (Incident code "+uid+")");
481                }
482    
483                //
484                //  Calculate the number of links in the addition.
485                //
486                String tstChange  = change.toString();
487                int    urlCounter = 0;
488                while( m_matcher.contains( tstChange,m_urlPattern ) ) {
489                    MatchResult m = m_matcher.getMatch();
490                    tstChange = tstChange.substring( m.endOffset(0) );
491                    urlCounter++;
492                }
493    
494                if( urlCounter > m_maxUrls ) {
495                    Host host = new Host( addr, null );
496                    m_temporaryBanList.add( host );
497    
498                    String uid = log( context, REJECT, REASON_TOO_MANY_URLS, change.toString() );
499                    log.info( "SPAM:TooManyUrls (" + uid + "). Added host " + addr + " to temporary ban list for adding too many URLs" );
500                    checkStrategy( context, REASON_TOO_MANY_URLS, "Herb says you look like a spammer, and I trust Herb! (Incident code " + uid + ")" );
501                }
502    
503                //
504                //  Check bot trap
505                //
506                checkBotTrap( context, change );
507    
508                //
509                //  Check UTF-8 mangling
510                //
511                checkUTF8( context, change );
512    
513                //
514                //  Do Akismet check.  This is good to be the last, because this is the most
515                //  expensive operation.
516                //
517                checkAkismet( context, change );
518    
519                m_lastModifications.add( new Host( addr, change ) );
520            }
521        }
522    
523    
524        /**
525         *  Checks against the akismet system.
526         *
527         * @param context
528         * @param change
529         * @throws RedirectException
530         */
531        private void checkAkismet( WikiContext context, Change change ) throws RedirectException {
532            if( m_akismetAPIKey != null ) {
533                if( m_akismet == null ) {
534                    log.info( "Initializing Akismet spam protection." );
535                    m_akismet = new Akismet( m_akismetAPIKey, context.getEngine().getBaseURL() );
536    
537                    if( !m_akismet.verifyAPIKey() ) {
538                        log.error( "Akismet API key cannot be verified.  Please check your config." );
539                        m_akismetAPIKey = null;
540                        m_akismet = null;
541                    }
542                }
543    
544                HttpServletRequest req = context.getHttpRequest();
545    
546                //
547                //  Akismet will mark all empty statements as spam, so we'll just
548                //  ignore them.
549                //
550                if( change.m_adds == 0 && change.m_removals > 0 ) {
551                    return;
552                }
553                
554                if( req != null && m_akismet != null ) {
555                    log.debug( "Calling Akismet to check for spam..." );
556    
557                    StopWatch sw = new StopWatch();
558                    sw.start();
559    
560                    String ipAddress     = HttpUtil.getRemoteAddress( req );
561                    String userAgent     = req.getHeader( "User-Agent" );
562                    String referrer      = req.getHeader( "Referer");
563                    String permalink     = context.getViewURL( context.getPage().getName() );
564                    String commentType   = context.getRequestContext().equals( WikiContext.COMMENT ) ? "comment" : "edit";
565                    String commentAuthor = context.getCurrentUser().getName();
566                    String commentAuthorEmail = null;
567                    String commentAuthorURL   = null;
568    
569                    boolean isSpam = m_akismet.commentCheck( ipAddress,
570                                                             userAgent,
571                                                             referrer,
572                                                             permalink,
573                                                             commentType,
574                                                             commentAuthor,
575                                                             commentAuthorEmail,
576                                                             commentAuthorURL,
577                                                             change.toString(),
578                                                             null );
579    
580                    sw.stop();
581                    log.debug( "Akismet request done in: " + sw );
582    
583                    if( isSpam ) {
584                        // Host host = new Host( ipAddress, null );
585                        // m_temporaryBanList.add( host );
586    
587                        String uid = log( context, REJECT, REASON_AKISMET, change.toString() );
588                        log.info( "SPAM:Akismet (" + uid + "). Akismet thinks this change is spam; added host to temporary ban list." );
589                        checkStrategy( context, REASON_AKISMET, "Akismet tells Herb you're a spammer, Herb trusts Akismet, and I trust Herb! (Incident code " + uid + ")" );
590                    }
591                }
592            }
593        }
594    
595        /**
596         * Returns a static string which can be used to detect spambots which just wildly fill in all the fields.
597         *
598         * @return A string
599         */
600        public static String getBotFieldName() {
601            return "submit_auth";
602        }
603    
604        /**
605         * This checks whether an invisible field is available in the request, and whether it's contents are suspected spam.
606         *
607         * @param context
608         * @param change
609         * @throws RedirectException
610         */
611        private void checkBotTrap( WikiContext context, Change change ) throws RedirectException {
612            HttpServletRequest request = context.getHttpRequest();
613    
614            if( request != null ) {
615                String unspam = request.getParameter( getBotFieldName() );
616                if( unspam != null && unspam.length() > 0 ) {
617                    String uid = log( context, REJECT, REASON_BOT_TRAP, change.toString() );
618    
619                    log.info( "SPAM:BotTrap (" + uid + ").  Wildly behaving bot detected." );
620                    checkStrategy( context, REASON_BOT_TRAP, "Spamming attempt detected. (Incident code " + uid + ")" );
621                }
622            }
623        }
624    
625        private void checkUTF8( WikiContext context, Change change ) throws RedirectException {
626            HttpServletRequest request = context.getHttpRequest();
627    
628            if( request != null ) {
629                String utf8field = request.getParameter( "encodingcheck" );
630    
631                if( utf8field != null && !utf8field.equals( "\u3041" ) ) {
632                    String uid = log( context, REJECT, REASON_UTF8_TRAP, change.toString() );
633    
634                    log.info( "SPAM:UTF8Trap (" + uid + ").  Wildly posting dumb bot detected." );
635                    checkStrategy( context, REASON_UTF8_TRAP, "Spamming attempt detected. (Incident code " + uid + ")" );
636                }
637            }
638        }
639    
640        /** Goes through the ban list and cleans away any host which has expired from it. */
641        private synchronized void cleanBanList() {
642            long now = System.currentTimeMillis();
643    
644            for( Iterator< Host > i = m_temporaryBanList.iterator(); i.hasNext(); ) {
645                Host host = i.next();
646    
647                if( host.getReleaseTime() < now ) {
648                    log.debug( "Removed host " + host.getAddress() + " from temporary ban list (expired)" );
649                    i.remove();
650                }
651            }
652        }
653    
654        /**
655         *  Checks the ban list if the IP address of the changer is already on it.
656         *
657         *  @param context
658         *  @throws RedirectException
659         */
660        private void checkBanList( WikiContext context, Change change ) throws RedirectException {
661            HttpServletRequest req = context.getHttpRequest();
662    
663            if( req != null ) {
664                String remote = HttpUtil.getRemoteAddress(req);
665                long now = System.currentTimeMillis();
666    
667                for( Iterator< Host > i = m_temporaryBanList.iterator(); i.hasNext(); ) {
668                    Host host = i.next();
669    
670                    if( host.getAddress().equals( remote ) ) {
671                        long timeleft = ( host.getReleaseTime() - now ) / 1000L;
672    
673                        log( context, REJECT, REASON_IP_BANNED_TEMPORARILY, change.m_change );
674                        checkStrategy( context, REASON_IP_BANNED_TEMPORARILY, "You have been temporarily banned from modifying this wiki. (" + timeleft + " seconds of ban left)" );
675                    }
676                }
677            }
678        }
679    
680        /**
681         *  If the spam filter notices changes in the black list page, it will refresh them automatically.
682         *
683         *  @param context
684         */
685        private void refreshBlacklists( WikiContext context ) {
686            try {
687                WikiPage source = context.getEngine().getPage( m_forbiddenWordsPage );
688                Attachment att = context.getEngine().getAttachmentManager().getAttachmentInfo( context, m_blacklist );
689    
690                boolean rebuild = false;
691    
692                //
693                //  Rebuild, if the page or the attachment has changed since.
694                //
695                if( source != null ) {
696                    if( m_spamPatterns == null || m_spamPatterns.isEmpty() || source.getLastModified().after( m_lastRebuild ) ) {
697                        rebuild = true;
698                    }
699                }
700    
701                if( att != null ) {
702                    if( m_spamPatterns == null || m_spamPatterns.isEmpty() || att.getLastModified().after( m_lastRebuild ) ) {
703                        rebuild = true;
704                    }
705                }
706    
707                //
708                //  Do the actual rebuilding.  For simplicity's sake, we always rebuild the complete
709                //  filter list regardless of what changed.
710                //
711                if( rebuild ) {
712                    m_lastRebuild = new Date();
713                    m_spamPatterns = parseWordList( source,
714                                                    ( source != null ) ? ( String )source.getAttribute( LISTVAR ) : null );
715    
716                    log.info( "Spam filter reloaded - recognizing " + m_spamPatterns.size() + " patterns from page " + m_forbiddenWordsPage );
717    
718                    if( att != null ) {
719                        InputStream in = context.getEngine().getAttachmentManager().getAttachmentStream(att);
720                        StringWriter out = new StringWriter();
721                        FileUtil.copyContents( new InputStreamReader( in,"UTF-8" ), out );
722                        Collection< Pattern > blackList = parseBlacklist( out.toString() );
723                        log.info( "...recognizing additional " + blackList.size() + " patterns from blacklist " + m_blacklist );
724                        m_spamPatterns.addAll( blackList );
725                    }
726                }
727            } catch( IOException ex ) {
728                log.info( "Unable to read attachment data, continuing...", ex );
729            } catch( ProviderException ex ) {
730                log.info( "Failed to read spam filter attachment, continuing...", ex );
731            }
732        }
733    
734        /**
735         *  Does a check against a known pattern list.
736         *
737         *  @param context
738         *  @param content
739         *  @param change
740         *  @throws RedirectException
741         */
742        private void checkPatternList( WikiContext context, String content, Change change ) throws RedirectException {
743            //
744            //  If we have no spam patterns defined, or we're trying to save
745            //  the page containing the patterns, just return.
746            //
747            if( m_spamPatterns == null || context.getPage().getName().equals( m_forbiddenWordsPage ) ) {
748                return;
749            }
750    
751            String ch = change.toString();
752            if( context.getHttpRequest() != null ) {
753                ch += HttpUtil.getRemoteAddress( context.getHttpRequest() );
754            }
755    
756            for( Pattern p : m_spamPatterns ) {
757                // log.debug("Attempting to match page contents with "+p.getPattern());
758    
759                if( m_matcher.contains( ch, p ) ) {
760                    //
761                    //  Spam filter has a match.
762                    //
763                    String uid = log( context, REJECT, REASON_REGEXP + "(" + p.getPattern() + ")", ch );
764    
765                    log.info( "SPAM:Regexp (" + uid + "). Content matches the spam filter '" + p.getPattern() + "'" );
766                    checkStrategy( context, REASON_REGEXP, "Herb says '" + p.getPattern() + "' is a bad spam word and I trust Herb! (Incident code " + uid + ")" );
767                }
768            }
769        }
770    
771        private void checkPatternList( WikiContext context, String content, String change ) throws RedirectException {
772            Change c = new Change();
773            c.m_change = change;
774            checkPatternList( context, content, c );
775        }
776     
777        /**
778         *  Creates a simple text string describing the added content.
779         *
780         *  @param context
781         *  @param newText
782         *  @return Empty string, if there is no change.
783         */
784        private static Change getChange( WikiContext context, String newText ) {
785            WikiPage page = context.getPage();
786            StringBuffer change = new StringBuffer();
787            WikiEngine engine = context.getEngine();
788            // Get current page version
789    
790            Change ch = new Change();
791            
792            try {
793                String oldText = engine.getPureText( page.getName(), WikiProvider.LATEST_VERSION );
794    
795                String[] first  = Diff.stringToArray( oldText );
796                String[] second = Diff.stringToArray( newText );
797                Revision rev = Diff.diff( first, second, new MyersDiff() );
798    
799                if( rev == null || rev.size() == 0 ) {
800                    return ch;
801                }
802                
803                for( int i = 0; i < rev.size(); i++ ) {
804                    Delta d = rev.getDelta( i );
805    
806                    if( d instanceof AddDelta ) {
807                        d.getRevised().toString( change, "", "\r\n" );
808                        ch.m_adds++;
809                        
810                    } else if( d instanceof ChangeDelta ) {
811                        d.getRevised().toString( change, "", "\r\n" );
812                        ch.m_adds++;
813                        
814                    } else if( d instanceof DeleteDelta ) {
815                        ch.m_removals++;
816                    }
817                }
818            } catch( DifferentiationFailedException e ) {
819                log.error( "Diff failed", e );
820            }
821    
822            //
823            //  Don't forget to include the change note, too
824            //
825            String changeNote = ( String )page.getAttribute( WikiPage.CHANGENOTE );
826    
827            if( changeNote != null ) {
828                change.append( "\r\n" );
829                change.append( changeNote );
830            }
831    
832            //
833            //  And author as well
834            //
835            if( page.getAuthor() != null ) {
836                change.append( "\r\n" + page.getAuthor() );
837            }
838    
839            ch.m_change = change.toString();
840            return ch;
841        }
842    
843        /**
844         *  Returns true, if this user should be ignored.  For example, admin users.
845         *
846         * @param context
847         * @return True, if this users should be ignored.
848         */
849        private boolean ignoreThisUser( WikiContext context ) {
850            if( context.hasAdminPermissions() ) {
851                return true;
852            }
853    
854            if( m_ignoreAuthenticated && context.getWikiSession().isAuthenticated() ) {
855                return true;
856            }
857    
858            if( context.getVariable( "captcha" ) != null ) {
859                return true;
860            }
861    
862            return false;
863        }
864    
865        /**
866         *  Returns a random string of six uppercase characters.
867         *
868         *  @return A random string
869         */
870        private static String getUniqueID() {
871            StringBuilder sb = new StringBuilder();
872            Random rand = new Random();
873    
874            for( int i = 0; i < 6; i++ ) {
875                char x = ( char )( 'A' + rand.nextInt( 26 ) );
876                sb.append( x );
877            }
878    
879            return sb.toString();
880        }
881    
882        /**
883         *  Returns a page to which we shall redirect, based on the current value of the "captcha" parameter.
884         *
885         *  @param ctx WikiContext
886         *  @return An URL to redirect to
887         */
888        private String getRedirectPage( WikiContext ctx ) {
889            if( m_useCaptcha ) {
890                return ctx.getURL( WikiContext.NONE, "Captcha.jsp", "page="+ctx.getEngine().encodeName( ctx.getPage().getName() ) );
891            }
892    
893            return ctx.getURL( WikiContext.VIEW, m_errorPage );
894        }
895    
896        /**
897         *  Checks whether the UserProfile matches certain checks.
898         *
899         *  @param profile The profile to check
900         *  @param context The WikiContext
901         *  @return False, if this userprofile is suspect and should not be allowed to be added.
902         *  @since 2.6.1
903         */
904        public boolean isValidUserProfile( WikiContext context, UserProfile profile ) {
905            try {
906                checkPatternList( context, profile.getEmail(), profile.getEmail() );
907                checkPatternList( context, profile.getFullname(), profile.getFullname() );
908                checkPatternList( context, profile.getLoginName(), profile.getLoginName() );
909            } catch( RedirectException e ) {
910                log.info("Detected attempt to create a spammer user account (see above for rejection reason)");
911                return false;
912            }
913    
914            return true;
915        }
916    
917        /**
918         *  This method is used to calculate an unique code when submitting the page to detect edit conflicts.  
919         *  It currently incorporates the last-modified date of the page, and the IP address of the submitter.
920         *
921         *  @param page The WikiPage under edit
922         *  @param request The HTTP Request
923         *  @since 2.6
924         *  @return A hash value for this page and session
925         */
926        public static final String getSpamHash( WikiPage page, HttpServletRequest request ) {
927            long lastModified = 0;
928    
929            if( page.getLastModified() != null ) {
930                lastModified = page.getLastModified().getTime();
931            }
932            long remote = HttpUtil.getRemoteAddress( request ).hashCode();
933    
934            return Long.toString( lastModified ^ remote );
935        }
936    
937        /**
938         *  Returns the name of the hash field to be used in this request. The value is unique per session, and once 
939         *  the session has expired, you cannot edit anymore.
940         *
941         *  @param request The page request
942         *  @return The name to be used in the hash field
943         *  @since  2.6
944         */
945        public static final String getHashFieldName( HttpServletRequest request ) {
946            String hash = null;
947    
948            if( request.getSession() != null ) {
949                hash = ( String )request.getSession().getAttribute( "_hash" );
950    
951                if( hash == null ) {
952                    hash = c_hashName;
953                    request.getSession().setAttribute( "_hash", hash );
954                }
955            }
956    
957            if( c_hashName == null || c_lastUpdate < ( System.currentTimeMillis() - HASH_DELAY * 60 * 60 * 1000 ) ) {
958                c_hashName = getUniqueID().toLowerCase();
959                c_lastUpdate = System.currentTimeMillis();
960            }
961    
962            return hash != null ? hash : c_hashName;
963        }
964    
965    
966        /**
967         *  This method checks if the hash value is still valid, i.e. if it exists at all. This can occur in two cases: 
968         *  either this is a spam bot which is not adaptive, or it is someone who has been editing one page for too long, 
969         *  and their session has expired.
970         *  <p>
971         *  This method puts a redirect to the http response field to page "SessionExpired" and logs the incident in 
972         *  the spam log (it may or may not be spam, but it's rather likely that it is).
973         *
974         *  @param context The WikiContext
975         *  @param pageContext The JSP PageContext.
976         *  @return True, if hash is okay.  False, if hash is not okay, and you need to redirect.
977         *  @throws IOException If redirection fails
978         *  @since 2.6
979         */
980        public static final boolean checkHash( WikiContext context, PageContext pageContext ) throws IOException {
981            String hashName = getHashFieldName( (HttpServletRequest)pageContext.getRequest() );
982    
983            if( pageContext.getRequest().getParameter(hashName) == null ) {
984                if( pageContext.getAttribute( hashName ) == null ) {
985                    Change change = getChange( context, EditorManager.getEditedText( pageContext ) );
986                    log( context, REJECT, "MissingHash", change.m_change );
987    
988                    String redirect = context.getURL( WikiContext.VIEW,"SessionExpired" );
989                    ( ( HttpServletResponse )pageContext.getResponse() ).sendRedirect( redirect );
990                    return false;
991                }
992            }
993    
994            return true;
995        }
996    
997        /**
998         * This helper method adds all the input fields to your editor that the SpamFilter requires
999         * to check for spam.  This <i>must</i> be in your editor form if you intend to use the SpamFilter.
1000         *  
1001         * @param pageContext The PageContext
1002         * @return A HTML string which contains input fields for the SpamFilter.
1003         */
1004        public static final String insertInputFields( PageContext pageContext ) {
1005            WikiContext ctx = WikiContext.findContext( pageContext );
1006            WikiEngine engine = ctx.getEngine();
1007    
1008            StringBuilder sb = new StringBuilder();
1009            if( engine.getContentEncoding().equals( "UTF-8" ) ) {
1010                sb.append( "<input name='encodingcheck' type='hidden' value='\u3041' />\n" );
1011            }
1012    
1013            return sb.toString();
1014        }
1015        
1016        /**
1017         *  A local class for storing host information.
1018         *
1019         *  @since
1020         */
1021        private class Host {
1022            
1023            private long   m_addedTime = System.currentTimeMillis();
1024            private long   m_releaseTime;
1025            private String m_address;
1026            private Change m_change;
1027    
1028            public String getAddress() {
1029                return m_address;
1030            }
1031    
1032            public long getReleaseTime() {
1033                return m_releaseTime;
1034            }
1035    
1036            public long getAddedTime() {
1037                return m_addedTime;
1038            }
1039    
1040            public Change getChange() {
1041                return m_change;
1042            }
1043    
1044            public Host( String ipaddress, Change change ) {
1045                m_address = ipaddress;
1046                m_change  = change;
1047                m_releaseTime = System.currentTimeMillis() + m_banTime * 60 * 1000L;
1048            }
1049            
1050        }
1051        
1052        private static class Change {
1053            
1054            public String m_change;
1055            public int    m_adds;
1056            public int    m_removals;
1057            
1058            public String toString() {
1059                return m_change;
1060            }
1061            
1062            public boolean equals( Object o ) {
1063                if( o instanceof Change ) {
1064                    return m_change.equals( ( ( Change )o ).m_change );
1065                }
1066                return false;
1067            }
1068            
1069            public int hashCode() {
1070                return m_change.hashCode() + 17;
1071            }
1072            
1073        }
1074    
1075    }