001    /*
002        Licensed to the Apache Software Foundation (ASF) under one
003        or more contributor license agreements.  See the NOTICE file
004        distributed with this work for additional information
005        regarding copyright ownership.  The ASF licenses this file
006        to you under the Apache License, Version 2.0 (the
007        "License"); you may not use this file except in compliance
008        with the License.  You may obtain a copy of the License at
009    
010           http://www.apache.org/licenses/LICENSE-2.0
011    
012        Unless required by applicable law or agreed to in writing,
013        software distributed under the License is distributed on an
014        "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015        KIND, either express or implied.  See the License for the
016        specific language governing permissions and limitations
017        under the License.    
018     */
019    package org.apache.wiki;
020    
021    import java.io.BufferedInputStream;
022    import java.io.BufferedOutputStream;
023    import java.io.File;
024    import java.io.FileInputStream;
025    import java.io.FileOutputStream;
026    import java.io.IOException;
027    import java.io.ObjectInputStream;
028    import java.io.ObjectOutputStream;
029    import java.io.Serializable;
030    import java.io.UnsupportedEncodingException;
031    import java.security.MessageDigest;
032    import java.security.NoSuchAlgorithmException;
033    import java.util.ArrayList;
034    import java.util.Collection;
035    import java.util.Collections;
036    import java.util.ConcurrentModificationException;
037    import java.util.HashMap;
038    import java.util.HashSet;
039    import java.util.Iterator;
040    import java.util.Map;
041    import java.util.Set;
042    import java.util.TreeSet;
043    
044    import org.apache.commons.lang.time.StopWatch;
045    import org.apache.log4j.Logger;
046    import org.apache.wiki.api.exceptions.ProviderException;
047    import org.apache.wiki.api.filters.BasicPageFilter;
048    import org.apache.wiki.attachment.Attachment;
049    import org.apache.wiki.event.WikiEvent;
050    import org.apache.wiki.event.WikiEventListener;
051    import org.apache.wiki.event.WikiEventUtils;
052    import org.apache.wiki.event.WikiPageEvent;
053    import org.apache.wiki.modules.InternalModule;
054    import org.apache.wiki.providers.WikiPageProvider;
055    import org.apache.wiki.util.TextUtil;
056    
057    /*
058      BUGS
059    
060      - if a wikilink is added to a page, then removed, RefMan still thinks that
061        the page refers to the wikilink page. Hm.
062    
063      - if a page is deleted, gets very confused.
064    
065      - Serialization causes page attributes to be missing, when InitializablePlugins
066        are not executed properly.  Thus, serialization should really also mark whether
067        a page is serializable or not...
068     */
069    
070    
071    /*
072       A word about synchronizing:
073    
074       I expect this object to be accessed in three situations:
075       - when a WikiEngine is created and it scans its wikipages
076       - when the WE saves a page
077       - when a JSP page accesses one of the WE's ReferenceManagers
078         to display a list of (un)referenced pages.
079    
080       So, access to this class is fairly rare, and usually triggered by
081       user interaction. OTOH, the methods in this class use their storage
082       objects intensively (and, sorry to say, in an unoptimized manner =).
083       My deduction: using unsynchronized HashMaps etc and syncing methods
084       or code blocks is preferrable to using slow, synced storage objects.
085       We don't have iterative code here, so I'm going to use synced methods
086       for now.
087    
088       Please contact me if you notice problems with ReferenceManager, and
089       especially with synchronization, or if you have suggestions about
090       syncing.
091    
092       ebu@memecry.net
093    */
094    
095    /**
096     *  Keeps track of wikipage references:
097     *  <UL>
098     *  <LI>What pages a given page refers to
099     *  <LI>What pages refer to a given page
100     *  </UL>
101     *
102     *  This is a quick'n'dirty approach without any finesse in storage and
103     *  searching algorithms; we trust java.util.*.
104     *  <P>
105     *  This class contains two HashMaps, m_refersTo and m_referredBy. The
106     *  first is indexed by WikiPage names and contains a Collection of all
107     *  WikiPages the page refers to. (Multiple references are not counted,
108     *  naturally.) The second is indexed by WikiPage names and contains
109     *  a Set of all pages that refer to the indexing page. (Notice -
110     *  the keys of both Maps should be kept in sync.)
111     *  <P>
112     *  When a page is added or edited, its references are parsed, a Collection
113     *  is received, and we crudely replace anything previous with this new
114     *  Collection. We then check each referenced page name and make sure they
115     *  know they are referred to by the new page.
116     *  <P>
117     *  Based on this information, we can perform non-optimal searches for
118     *  e.g. unreferenced pages, top ten lists, etc.
119     *  <P>
120     *  The owning class must take responsibility of filling in any pre-existing
121     *  information, probably by loading each and every WikiPage and calling this
122     *  class to update the references when created.
123     *
124     *  @since 1.6.1
125     */
126    
127    // FIXME: The way that we save attributes is now a major booboo, and must be
128    //        replace forthwith.  However, this is a workaround for the great deal
129    //        of problems that occur here...
130    
131    public class ReferenceManager
132        extends BasicPageFilter
133        implements InternalModule, WikiEventListener
134    {
135        /** Maps page wikiname to a Collection of pages it refers to. The Collection
136         *  must contain Strings. The Collection may contain names of non-existing
137         *  pages.
138         */
139        private Map<String,Collection<String>> m_refersTo;
140        private Map<String,Collection<String>> m_unmutableRefersTo;
141    
142        /** Maps page wikiname to a Set of referring pages. The Set must
143         *  contain Strings. Non-existing pages (a reference exists, but not a file
144         *  for the page contents) may have an empty Set in m_referredBy.
145         */
146        private Map<String,Set<String>> m_referredBy;
147        private Map<String,Set<String>> m_unmutableReferredBy;
148    
149        /** The WikiEngine that owns this object. */
150        private WikiEngine     m_engine;
151    
152        private boolean        m_matchEnglishPlurals = false;
153    
154        private static Logger log = Logger.getLogger(ReferenceManager.class);
155    
156        private static final String SERIALIZATION_FILE = "refmgr.ser";
157        private static final String SERIALIZATION_DIR  = "refmgr-attr";
158    
159        /** We use this also a generic serialization id */
160        private static final long serialVersionUID = 4L;
161    
162        /**
163         *  Builds a new ReferenceManager.
164         *
165         *  @param engine The WikiEngine to which this is managing references to.
166         */
167        public ReferenceManager( WikiEngine engine )
168        {
169            m_refersTo   = new HashMap<String,Collection<String>>();
170            m_referredBy = new HashMap<String,Set<String>>();
171            m_engine = engine;
172    
173            m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(),
174                                                                 WikiEngine.PROP_MATCHPLURALS,
175                                                                 m_matchEnglishPlurals );
176    
177            //
178            //  Create two maps that contain unmutable versions of the two basic maps.
179            //
180            m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
181            m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
182        }
183    
184        /**
185         *  Does a full reference update.  Does not sync; assumes that you do it afterwards.
186         */
187        private void updatePageReferences( WikiPage page ) throws ProviderException
188        {
189            String content = m_engine.getPageManager().getPageText( page.getName(),
190                                                                    WikiPageProvider.LATEST_VERSION );
191    
192            TreeSet<String> res = new TreeSet<String>();
193            Collection<String> links = m_engine.scanWikiLinks( page, content );
194    
195            res.addAll( links );
196            Collection attachments = m_engine.getAttachmentManager().listAttachments( page );
197    
198            for( Iterator atti = attachments.iterator(); atti.hasNext(); )
199            {
200                res.add( ((Attachment)(atti.next())).getName() );
201            }
202    
203            internalUpdateReferences( page.getName(), res );
204        }
205    
206        /**
207         *  Initializes the entire reference manager with the initial set of pages
208         *  from the collection.
209         *
210         *  @param pages A collection of all pages you want to be included in the reference
211         *               count.
212         *  @since 2.2
213         *  @throws ProviderException If reading of pages fail.
214         */
215        public void initialize( Collection pages )
216            throws ProviderException
217        {
218            log.debug( "Initializing new ReferenceManager with "+pages.size()+" initial pages." );
219            StopWatch sw = new StopWatch();
220            sw.start();
221            log.info( "Starting cross reference scan of WikiPages" );
222    
223            //
224            //  First, try to serialize old data from disk.  If that fails,
225            //  we'll go and update the entire reference lists (which'll take
226            //  time)
227            //
228            try
229            {
230                //
231                //  Unserialize things.  The loop below cannot be combined with
232                //  the other loop below, simply because engine.getPage() has
233                //  side effects such as loading initializing the user databases,
234                //  which in turn want all of the pages to be read already...
235                //
236                //  Yes, this is a kludge.  We know.  Will be fixed.
237                //
238                long saved = unserializeFromDisk();
239    
240                for( Iterator it = pages.iterator(); it.hasNext(); )
241                {
242                    WikiPage page = (WikiPage) it.next();
243    
244                    unserializeAttrsFromDisk( page );
245                }
246    
247                //
248                //  Now we must check if any of the pages have been changed
249                //  while we were in the electronic la-la-land, and update
250                //  the references for them.
251                //
252    
253                Iterator it = pages.iterator();
254    
255                while( it.hasNext() )
256                {
257                    WikiPage page = (WikiPage) it.next();
258    
259                    if( page instanceof Attachment )
260                    {
261                        // Skip attachments
262                    }
263                    else
264                    {
265    
266                        // Refresh with the latest copy
267                        page = m_engine.getPage( page.getName() );
268    
269                        if( page.getLastModified() == null )
270                        {
271                            log.fatal( "Provider returns null lastModified.  Please submit a bug report." );
272                        }
273                        else if( page.getLastModified().getTime() > saved )
274                        {
275                            updatePageReferences( page );
276                        }
277                    }
278                }
279    
280            }
281            catch( Exception e )
282            {
283                log.info("Unable to unserialize old refmgr information, rebuilding database: "+e.getMessage());
284                buildKeyLists( pages );
285    
286                // Scan the existing pages from disk and update references in the manager.
287                Iterator it = pages.iterator();
288                while( it.hasNext() )
289                {
290                    WikiPage page  = (WikiPage)it.next();
291    
292                    if( page instanceof Attachment )
293                    {
294                        // We cannot build a reference list from the contents
295                        // of attachments, so we skip them.
296                    }
297                    else
298                    {
299                        updatePageReferences( page );
300    
301                        serializeAttrsToDisk( page );
302                    }
303    
304                }
305    
306                serializeToDisk();
307            }
308    
309            sw.stop();
310            log.info( "Cross reference scan done in "+sw );
311    
312            WikiEventUtils.addWikiEventListener(m_engine.getPageManager(),
313                                                WikiPageEvent.PAGE_DELETED, this);
314        }
315    
316        /**
317         *  Reads the serialized data from the disk back to memory.
318         *  Returns the date when the data was last written on disk
319         */
320        @SuppressWarnings("unchecked")
321        private synchronized long unserializeFromDisk()
322            throws IOException,
323                   ClassNotFoundException
324        {
325            ObjectInputStream in = null;
326            long saved = 0L;
327    
328            try
329            {
330                StopWatch sw = new StopWatch();
331                sw.start();
332    
333                File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
334    
335                in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) );
336    
337                long ver     = in.readLong();
338    
339                if( ver != serialVersionUID )
340                {
341                    throw new IOException("File format has changed; I need to recalculate references.");
342                }
343    
344                saved        = in.readLong();
345                m_refersTo   = (Map) in.readObject();
346                m_referredBy = (Map) in.readObject();
347    
348                in.close();
349    
350                m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
351                m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
352    
353                sw.stop();
354                log.debug("Read serialized data successfully in "+sw);
355            }
356            finally
357            {
358                if( in != null ) in.close();
359            }
360    
361            return saved;
362        }
363    
364        /**
365         *  Serializes hashmaps to disk.  The format is private, don't touch it.
366         */
367        private synchronized void serializeToDisk()
368        {
369            ObjectOutputStream out = null;
370    
371            try
372            {
373                StopWatch sw = new StopWatch();
374                sw.start();
375    
376                File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
377    
378                out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) );
379    
380                out.writeLong( serialVersionUID );
381                out.writeLong( System.currentTimeMillis() ); // Timestamp
382                out.writeObject( m_refersTo );
383                out.writeObject( m_referredBy );
384    
385                out.close();
386    
387                sw.stop();
388    
389                log.debug("serialization done - took "+sw);
390            }
391            catch( IOException e )
392            {
393                log.error("Unable to serialize!");
394    
395                try
396                {
397                    if( out != null ) out.close();
398                }
399                catch( IOException ex ) {}
400            }
401        }
402    
403        private String getHashFileName( String pageName )
404            throws NoSuchAlgorithmException
405        {
406            MessageDigest digest = MessageDigest.getInstance("MD5");
407    
408            byte[] dig;
409            try
410            {
411                dig = digest.digest( pageName.getBytes("UTF-8") );
412            }
413            catch (UnsupportedEncodingException e)
414            {
415                throw new InternalWikiException("AAAAGH!  UTF-8 is gone!  My eyes!  It burns...!");
416            }
417    
418            return TextUtil.toHexString(dig)+".cache";
419        }
420    
421        /**
422         *  Reads the serialized data from the disk back to memory.
423         *  Returns the date when the data was last written on disk
424         */
425        private synchronized long unserializeAttrsFromDisk(WikiPage p)
426            throws IOException,
427                   ClassNotFoundException
428        {
429            ObjectInputStream in = null;
430            long saved = 0L;
431    
432            try
433            {
434                StopWatch sw = new StopWatch();
435                sw.start();
436    
437                //
438                //  Find attribute cache, and check if it exists
439                //
440                File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
441    
442                f = new File( f, getHashFileName(p.getName()) );
443    
444                if( !f.exists() )
445                {
446                    return 0L;
447                }
448    
449                log.debug("Deserializing attributes for "+p.getName());
450    
451                in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) );
452    
453                long ver     = in.readLong();
454    
455                if( ver != serialVersionUID )
456                {
457                    log.debug("File format has changed; cannot deserialize.");
458                    return 0L;
459                }
460    
461                saved        = in.readLong();
462    
463                String name  = in.readUTF();
464    
465                if( !name.equals(p.getName()) )
466                {
467                    log.debug("File name does not match ("+name+"), skipping...");
468                    return 0L; // Not here
469                }
470    
471                long entries = in.readLong();
472    
473                for( int i = 0; i < entries; i++ )
474                {
475                    String key   = in.readUTF();
476                    Object value = in.readObject();
477    
478                    p.setAttribute( key, value );
479    
480                    log.debug("   attr: "+key+"="+value);
481                }
482    
483                in.close();
484    
485                sw.stop();
486                log.debug("Read serialized data for "+name+" successfully in "+sw);
487                p.setHasMetadata();
488            }
489            catch( NoSuchAlgorithmException e )
490            {
491                log.fatal("No MD5!?!");
492            }
493            finally
494            {
495                if( in != null ) in.close();
496            }
497    
498            return saved;
499        }
500    
501        /**
502         *  Serializes hashmaps to disk.  The format is private, don't touch it.
503         */
504        private synchronized void serializeAttrsToDisk( WikiPage p )
505        {
506            ObjectOutputStream out = null;
507            StopWatch sw = new StopWatch();
508            sw.start();
509    
510            try
511            {
512                File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
513    
514                if( !f.exists() ) f.mkdirs();
515    
516                //
517                //  Create a digest for the name
518                //
519                f = new File( f, getHashFileName(p.getName()) );
520    
521                // FIXME: There is a concurrency issue here...
522                Set entries = p.getAttributes().entrySet();
523    
524                if( entries.size() == 0 ) 
525                {
526                    //  Nothing to serialize, therefore we will just simply remove the
527                    //  serialization file so that the next time we boot, we don't
528                    //  deserialize old data.
529                    f.delete();
530                    return;
531                }
532    
533                out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) );
534    
535                out.writeLong( serialVersionUID );
536                out.writeLong( System.currentTimeMillis() ); // Timestamp
537    
538                out.writeUTF( p.getName() );
539                out.writeLong( entries.size() );
540    
541                for( Iterator i = entries.iterator(); i.hasNext(); )
542                {
543                    Map.Entry e = (Map.Entry) i.next();
544    
545                    if( e.getValue() instanceof Serializable )
546                    {
547                        out.writeUTF( (String)e.getKey() );
548                        out.writeObject( e.getValue() );
549                    }
550                }
551    
552                out.close();
553    
554            }
555            catch( IOException e )
556            {
557                log.error("Unable to serialize!");
558    
559                try
560                {
561                    if( out != null ) out.close();
562                }
563                catch( IOException ex ) {}
564            }
565            catch( NoSuchAlgorithmException e )
566            {
567                log.fatal("No MD5 algorithm!?!");
568            }
569            finally
570            {
571                sw.stop();
572    
573                log.debug("serialization for "+p.getName()+" done - took "+sw);
574            }
575        }
576    
577        /**
578         *  After the page has been saved, updates the reference lists.
579         *  
580         *  @param context {@inheritDoc}
581         *  @param content {@inheritDoc}
582         */
583        public void postSave( WikiContext context, String content )
584        {
585            WikiPage page = context.getPage();
586    
587            updateReferences( page.getName(),
588                              context.getEngine().scanWikiLinks( page, content ) );
589    
590            serializeAttrsToDisk( page );
591        }
592    
593        /**
594         * Updates the m_referedTo and m_referredBy hashmaps when a page has been
595         * deleted.
596         * <P>
597         * Within the m_refersTo map the pagename is a key. The whole key-value-set
598         * has to be removed to keep the map clean.
599         * Within the m_referredBy map the name is stored as a value. Since a key
600         * can have more than one value we have to delete just the key-value-pair
601         * referring page:deleted page.
602         *
603         *  @param page Name of the page to remove from the maps.
604         */
605        public synchronized void pageRemoved( WikiPage page )
606        {
607            String pageName = page.getName();
608    
609            pageRemoved(pageName);
610        }
611    
612        private void pageRemoved(String pageName)
613        {
614            Collection<String> refTo = m_refersTo.get( pageName );
615    
616            if( refTo != null )
617            {
618                Iterator itRefTo = refTo.iterator();
619                while( itRefTo.hasNext() )
620                {
621                    String referredPageName = (String)itRefTo.next();
622                    Set<String> refBy = m_referredBy.get( referredPageName );
623    
624                    if( refBy == null )
625                        throw new InternalWikiException("Refmgr out of sync: page "+pageName+" refers to "+referredPageName+", which has null referrers.");
626    
627                    refBy.remove(pageName);
628    
629                    m_referredBy.remove( referredPageName );
630    
631                    // We won't put it back again if it becomes empty and does not exist.  It will be added
632                    // later on anyway, if it becomes referenced again.
633                    if( !(refBy.isEmpty() && !m_engine.pageExists(referredPageName)) )
634                    {
635                        m_referredBy.put( referredPageName, refBy );
636                    }
637                }
638    
639                log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName ));
640                m_refersTo.remove( pageName );
641            }
642    
643            Set<String> refBy = m_referredBy.get( pageName );
644            if( refBy == null || refBy.isEmpty() )
645            {
646                m_referredBy.remove( pageName );
647            }
648    
649            //
650            //  Remove any traces from the disk, too
651            //
652            serializeToDisk();
653    
654            try
655            {
656                File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
657    
658                f = new File( f, getHashFileName(pageName) );
659    
660                if( f.exists() ) f.delete();
661            }
662            catch( NoSuchAlgorithmException e )
663            {
664                log.error("What do you mean - no such algorithm?", e);
665            }
666        }
667    
668        /**
669         *  Updates the referred pages of a new or edited WikiPage. If a refersTo
670         *  entry for this page already exists, it is removed and a new one is built
671         *  from scratch. Also calls updateReferredBy() for each referenced page.
672         *  <P>
673         *  This is the method to call when a new page has been created and we
674         *  want to a) set up its references and b) notify the referred pages
675         *  of the references. Use this method during run-time.
676         *
677         *  @param page Name of the page to update.
678         *  @param references A Collection of Strings, each one pointing to a page this page references.
679         */
680        public synchronized void updateReferences( String page, Collection< String > references )
681        {
682            internalUpdateReferences(page, references);
683    
684            serializeToDisk();
685        }
686    
687        /**
688         *  Updates the referred pages of a new or edited WikiPage. If a refersTo
689         *  entry for this page already exists, it is removed and a new one is built
690         *  from scratch. Also calls updateReferredBy() for each referenced page.
691         *  <p>
692         *  This method does not synchronize the database to disk.
693         *
694         *  @param page Name of the page to update.
695         *  @param references A Collection of Strings, each one pointing to a page this page references.
696         */
697    
698        private void internalUpdateReferences(String page, Collection< String > references)
699        {
700            page = getFinalPageName( page );
701    
702            //
703            // Create a new entry in m_refersTo.
704            //
705            Collection oldRefTo = m_refersTo.get( page );
706            m_refersTo.remove( page );
707    
708            TreeSet<String> cleanedRefs = new TreeSet<String>();
709            for( Iterator< String > i = references.iterator(); i.hasNext(); )
710            {
711                String ref = i.next();
712    
713                ref = getFinalPageName( ref );
714    
715                cleanedRefs.add( ref );
716            }
717    
718            m_refersTo.put( page, cleanedRefs );
719    
720            //
721            //  We know the page exists, since it's making references somewhere.
722            //  If an entry for it didn't exist previously in m_referredBy, make
723            //  sure one is added now.
724            //
725            if( !m_referredBy.containsKey( page ) )
726            {
727                m_referredBy.put( page, new TreeSet<String>() );
728            }
729    
730            //
731            //  Get all pages that used to be referred to by 'page' and
732            //  remove that reference. (We don't want to try to figure out
733            //  which particular references were removed...)
734            //
735            cleanReferredBy( page, oldRefTo, cleanedRefs );
736    
737            //
738            //  Notify all referred pages of their referinesshoodicity.
739            //
740            Iterator<String> it = cleanedRefs.iterator();
741            while( it.hasNext() )
742            {
743                String referredPageName = it.next();
744                updateReferredBy( getFinalPageName(referredPageName), page );
745            }
746        }
747    
748        /**
749         * Returns the refers-to list. For debugging.
750         * 
751         * @return The refers-to list.
752         */
753        protected Map getRefersTo()
754        {
755            return m_refersTo;
756        }
757    
758        /**
759         * Returns the referred-by list. For debugging.
760         * 
761         * @return Referred-by lists.
762         */
763        protected Map getReferredBy()
764        {
765            return m_referredBy;
766        }
767    
768        /**
769         * Cleans the 'referred by' list, removing references by 'referrer' to
770         * any other page. Called after 'referrer' is removed.
771         */
772        private void cleanReferredBy( String referrer,
773                                      Collection oldReferred,
774                                      Collection<String> newReferred )
775        {
776            // Two ways to go about this. One is to look up all pages previously
777            // referred by referrer and remove referrer from their lists, and let
778            // the update put them back in (except possibly removed ones).
779            // The other is to get the old referred to list, compare to the new,
780            // and tell the ones missing in the latter to remove referrer from
781            // their list. Hm. We'll just try the first for now. Need to come
782            // back and optimize this a bit.
783    
784            if( oldReferred == null )
785                return;
786    
787            Iterator it = oldReferred.iterator();
788            while( it.hasNext() )
789            {
790                String referredPage = (String)it.next();
791                Set oldRefBy = m_referredBy.get( referredPage );
792                if( oldRefBy != null )
793                {
794                    oldRefBy.remove( referrer );
795                }
796    
797                // If the page is referred to by no one AND it doesn't even
798                // exist, we might just as well forget about this entry.
799                // It will be added again elsewhere if new references appear.
800                if( ( ( oldRefBy == null ) || ( oldRefBy.isEmpty() ) ) &&
801                    ( m_engine.pageExists( referredPage ) == false ) )
802                {
803                    m_referredBy.remove( referredPage );
804                }
805            }
806    
807        }
808    
809    
810        /**
811         *  When initially building a ReferenceManager from scratch, call this method
812         * BEFORE calling updateReferences() with a full list of existing page names.
813         * It builds the refersTo and referredBy key lists, thus enabling
814         * updateReferences() to function correctly.
815         * <P>
816         * This method should NEVER be called after initialization. It clears all mappings
817         * from the reference tables.
818         *
819         * @param pages   a Collection containing WikiPage objects.
820         */
821        private synchronized void buildKeyLists( Collection pages )
822        {
823            m_refersTo.clear();
824            m_referredBy.clear();
825    
826            if( pages == null )
827                return;
828    
829            Iterator it = pages.iterator();
830            try
831            {
832                while( it.hasNext() )
833                {
834                    WikiPage page = (WikiPage)it.next();
835                    // We add a non-null entry to referredBy to indicate the referred page exists
836                    m_referredBy.put( page.getName(), new TreeSet<String>() );
837                    // Just add a key to refersTo; the keys need to be in sync with referredBy.
838                    m_refersTo.put( page.getName(), null );
839                }
840            }
841            catch( ClassCastException e )
842            {
843                log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e );
844            }
845        }
846    
847    
848        /**
849         * Marks the page as referred to by the referrer. If the page does not
850         * exist previously, nothing is done. (This means that some page, somewhere,
851         * has a link to a page that does not exist.)
852         * <P>
853         * This method is NOT synchronized. It should only be referred to from
854         * within a synchronized method, or it should be made synced if necessary.
855         */
856        private void updateReferredBy( String page, String referrer )
857        {
858            // We're not really interested in first level self-references.
859            /*
860            if( page.equals( referrer ) )
861            {
862                return;
863            }
864            */
865            // Neither are we interested if plural forms refer to each other.
866            if( m_matchEnglishPlurals )
867            {
868                String p2 = page.endsWith("s") ? page.substring(0,page.length()-1) : page+"s";
869    
870                if( referrer.equals(p2) )
871                {
872                    return;
873                }
874            }
875    
876            Set<String> referrers = m_referredBy.get( page );
877    
878            // Even if 'page' has not been created yet, it can still be referenced.
879            // This requires we don't use m_referredBy keys when looking up missing
880            // pages, of course.
881            if(referrers == null)
882            {
883                referrers = new TreeSet<String>();
884                m_referredBy.put( page, referrers );
885            }
886            referrers.add( referrer );
887        }
888    
889    
890        /**
891         * Clears the references to a certain page so it's no longer in the map.
892         *
893         * @param pagename  Name of the page to clear references for.
894         */
895        public synchronized void clearPageEntries( String pagename )
896        {
897            pagename = getFinalPageName(pagename);
898    
899            //
900            //  Remove this item from the referredBy list of any page
901            //  which this item refers to.
902            //
903            Collection<String> c = m_refersTo.get( pagename );
904    
905            if( c != null )
906            {
907                for( String key : c )
908                {
909                    Collection<?> dref = m_referredBy.get( key );
910    
911                    dref.remove( pagename );
912                }
913            }
914    
915            //
916            //  Finally, remove direct references.
917            //
918            m_referredBy.remove( pagename );
919            m_refersTo.remove( pagename );
920        }
921    
922    
923        /**
924         *  Finds all unreferenced pages. This requires a linear scan through
925         *  m_referredBy to locate keys with null or empty values.
926         *  
927         *  @return The Collection of Strings
928         */
929        public synchronized Collection findUnreferenced()
930        {
931            ArrayList<String> unref = new ArrayList<String>();
932    
933            for( String key : m_referredBy.keySet() )
934            {
935                Set<?> refs = getReferenceList( m_referredBy, key );
936                
937                if( refs == null || refs.isEmpty() )
938                {
939                    unref.add( key );
940                }
941            }
942    
943            return unref;
944        }
945    
946    
947        /**
948         * Finds all references to non-existant pages. This requires a linear
949         * scan through m_refersTo values; each value must have a corresponding
950         * key entry in the reference Maps, otherwise such a page has never
951         * been created.
952         * <P>
953         * Returns a Collection containing Strings of unreferenced page names.
954         * Each non-existant page name is shown only once - we don't return information
955         * on who referred to it.
956         * 
957         * @return A Collection of Strings
958         */
959        public synchronized Collection findUncreated()
960        {
961            TreeSet<String> uncreated = new TreeSet<String>();
962    
963            // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
964            // We want to reread the code to make sure our HashMaps are in sync...
965    
966            Collection<Collection<String>> allReferences = m_refersTo.values();
967    
968            for( Collection<String> refs : allReferences )
969            {
970                if( refs != null )
971                {
972                    for( String aReference : refs )
973                    {
974                        if( m_engine.pageExists( aReference ) == false )
975                        {
976                            uncreated.add( aReference );
977                        }
978                    }
979                }
980            }
981    
982            return uncreated;
983        }
984    
985        /**
986         *  Searches for the given page in the given Map, and returns
987         *  the set of references.  This method also takes care of English plural
988         *  matching.
989         *  
990         *  @param coll The Map to search in
991         *  @param pagename The name to find.
992         *  @return The references list.
993         */
994        private <T> Set<T> getReferenceList( Map<String,Set<T>> coll, String pagename )
995        {
996            Set<T> refs = coll.get( pagename );
997    
998            if( m_matchEnglishPlurals )
999            {
1000                //
1001                //  We'll add also matches from the "other" page.
1002                //
1003                Set<T> refs2;
1004    
1005                if( pagename.endsWith("s") )
1006                {
1007                    refs2 = coll.get( pagename.substring(0,pagename.length()-1) );
1008                }
1009                else
1010                {
1011                    refs2 = coll.get( pagename+"s" );
1012                }
1013    
1014                if( refs2 != null )
1015                {
1016                    if( refs != null )
1017                        refs.addAll( refs2 );
1018                    else
1019                        refs = refs2;
1020                }
1021            }
1022            return refs;
1023        }
1024    
1025        /**
1026         * Find all pages that refer to this page. Returns null if the page
1027         * does not exist or is not referenced at all, otherwise returns a
1028         * collection containing page names (String) that refer to this one.
1029         * <p>
1030         * @param pagename The page to find referrers for.
1031         * @return A Collection of Strings.  (This is, in fact, a Set, and is likely
1032         *         to change at some point to a Set).  May return null, if the page
1033         *         does not exist, or if it has no references.
1034         */
1035        // FIXME: Return a Set instead of a Collection.
1036        public synchronized Collection findReferrers( String pagename )
1037        {
1038            Set<String> refs = getReferenceList( m_referredBy, pagename );
1039    
1040            if( refs == null || refs.isEmpty() )
1041            {
1042                return null;
1043            }
1044    
1045            return refs;
1046    
1047        }
1048    
1049        /**
1050         *  Returns all pages that refer to this page.  Note that this method
1051         *  returns an unmodifiable Map, which may be abruptly changed.  So any
1052         *  access to any iterator may result in a ConcurrentModificationException.
1053         *  <p>
1054         *  The advantages of using this method over findReferrers() is that
1055         *  it is very fast, as it does not create a new object.  The disadvantages
1056         *  are that it does not do any mapping between plural names, and you
1057         *  may end up getting a ConcurrentModificationException.
1058         *
1059         * @param pageName Page name to query.
1060         * @return A Set of Strings containing the names of all the pages that refer
1061         *         to this page.  May return null, if the page does not exist or
1062         *         has not been indexed yet.
1063         * @since 2.2.33
1064         */
1065        public Set findReferredBy( String pageName )
1066        {
1067            return m_unmutableReferredBy.get( getFinalPageName(pageName) );
1068        }
1069    
1070        /**
1071         *  Returns all pages that this page refers to.  You can use this as a quick
1072         *  way of getting the links from a page, but note that it does not link any
1073         *  InterWiki, image, or external links.  It does contain attachments, though.
1074         *  <p>
1075         *  The Collection returned is unmutable, so you cannot change it.  It does reflect
1076         *  the current status and thus is a live object.  So, if you are using any
1077         *  kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
1078         *  <p>
1079         *  The returned value is a Collection, because a page may refer to another page
1080         *  multiple times.
1081         *
1082         * @param pageName Page name to query
1083         * @return A Collection of Strings containing the names of the pages that this page
1084         *         refers to. May return null, if the page does not exist or has not
1085         *         been indexed yet.
1086         * @since 2.2.33
1087         */
1088        public Collection findRefersTo( String pageName )
1089        {
1090            return m_unmutableRefersTo.get( getFinalPageName(pageName) );
1091        }
1092    
1093        /**
1094         * This 'deepHashCode' can be used to determine if there were any
1095         * modifications made to the underlying to and by maps of the
1096         * ReferenceManager. The maps of the ReferenceManager are not
1097         * synchronized, so someone could add/remove entries in them while the
1098         * hashCode is being computed.
1099         *
1100         * @return Sum of the hashCodes for the to and by maps of the
1101         *         ReferenceManager
1102         * @since 2.3.24
1103         */
1104        //
1105        //   This method traps and retries if a concurrent
1106        //   modifcaition occurs.
1107        //   TODO: It is unnecessary to calculate the hashcode; it should be calculated only
1108        //         when the hashmaps are changed.  This is slow.
1109        //
1110        public int deepHashCode()
1111        {
1112            boolean failed = true;
1113            int signature = 0;
1114    
1115            while (failed)
1116            {
1117                signature = 0;
1118                try
1119                {
1120                    signature ^= m_referredBy.hashCode();
1121                    signature ^= m_refersTo.hashCode();
1122                    failed = false;
1123                }
1124                catch (ConcurrentModificationException e)
1125                {
1126                    Thread.yield();
1127                }
1128            }
1129    
1130            return signature;
1131        }
1132    
1133        /**
1134         *  Returns a list of all pages that the ReferenceManager knows about.
1135         *  This should be roughly equivalent to PageManager.getAllPages(), but without
1136         *  the potential disk access overhead.  Note that this method is not guaranteed
1137         *  to return a Set of really all pages (especially during startup), but it is
1138         *  very fast.
1139         *
1140         *  @return A Set of all defined page names that ReferenceManager knows about.
1141         *  @since 2.3.24
1142         */
1143        public Set findCreated()
1144        {
1145            return new HashSet<String>( m_refersTo.keySet() );
1146        }
1147    
1148        private String getFinalPageName( String orig )
1149        {
1150            try
1151            {
1152                String s = m_engine.getFinalPageName( orig );
1153    
1154                if( s == null ) s = orig;
1155    
1156                return s;
1157            }
1158            catch( ProviderException e )
1159            {
1160                log.error("Error while trying to fetch a page name; trying to cope with the situation.",e);
1161    
1162                return orig;
1163            }
1164        }
1165    
1166        /**
1167         *  {@inheritDoc}
1168         */
1169        public void actionPerformed(WikiEvent event)
1170        {
1171            if( (event instanceof WikiPageEvent) && (event.getType() == WikiPageEvent.PAGE_DELETED) )
1172            {
1173                String pageName = ((WikiPageEvent) event).getPageName();
1174    
1175                if( pageName != null )
1176                {
1177                    pageRemoved( pageName );
1178                }
1179            }
1180        }
1181    }