001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.    
018 */
019package org.apache.wiki;
020
021import java.io.BufferedInputStream;
022import java.io.BufferedOutputStream;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileOutputStream;
026import java.io.IOException;
027import java.io.ObjectInputStream;
028import java.io.ObjectOutputStream;
029import java.io.Serializable;
030import java.io.UnsupportedEncodingException;
031import java.security.MessageDigest;
032import java.security.NoSuchAlgorithmException;
033import java.util.ArrayList;
034import java.util.Collection;
035import java.util.Collections;
036import java.util.ConcurrentModificationException;
037import java.util.HashMap;
038import java.util.HashSet;
039import java.util.Iterator;
040import java.util.Map;
041import java.util.Set;
042import java.util.TreeSet;
043
044import org.apache.commons.lang.time.StopWatch;
045import org.apache.log4j.Logger;
046import org.apache.wiki.api.exceptions.ProviderException;
047import org.apache.wiki.api.filters.BasicPageFilter;
048import org.apache.wiki.attachment.Attachment;
049import org.apache.wiki.event.WikiEvent;
050import org.apache.wiki.event.WikiEventListener;
051import org.apache.wiki.event.WikiEventUtils;
052import org.apache.wiki.event.WikiPageEvent;
053import org.apache.wiki.modules.InternalModule;
054import org.apache.wiki.providers.WikiPageProvider;
055import org.apache.wiki.util.TextUtil;
056
057/*
058  BUGS
059
060  - if a wikilink is added to a page, then removed, RefMan still thinks that
061    the page refers to the wikilink page. Hm.
062
063  - if a page is deleted, gets very confused.
064
065  - Serialization causes page attributes to be missing, when InitializablePlugins
066    are not executed properly.  Thus, serialization should really also mark whether
067    a page is serializable or not...
068 */
069
070
071/*
072   A word about synchronizing:
073
074   I expect this object to be accessed in three situations:
075   - when a WikiEngine is created and it scans its wikipages
076   - when the WE saves a page
077   - when a JSP page accesses one of the WE's ReferenceManagers
078     to display a list of (un)referenced pages.
079
080   So, access to this class is fairly rare, and usually triggered by
081   user interaction. OTOH, the methods in this class use their storage
082   objects intensively (and, sorry to say, in an unoptimized manner =).
083   My deduction: using unsynchronized HashMaps etc and syncing methods
084   or code blocks is preferrable to using slow, synced storage objects.
085   We don't have iterative code here, so I'm going to use synced methods
086   for now.
087
088   Please contact me if you notice problems with ReferenceManager, and
089   especially with synchronization, or if you have suggestions about
090   syncing.
091
092   ebu@memecry.net
093*/
094
095/**
096 *  Keeps track of wikipage references:
097 *  <UL>
098 *  <LI>What pages a given page refers to
099 *  <LI>What pages refer to a given page
100 *  </UL>
101 *
102 *  This is a quick'n'dirty approach without any finesse in storage and
103 *  searching algorithms; we trust java.util.*.
104 *  <P>
105 *  This class contains two HashMaps, m_refersTo and m_referredBy. The
106 *  first is indexed by WikiPage names and contains a Collection of all
107 *  WikiPages the page refers to. (Multiple references are not counted,
108 *  naturally.) The second is indexed by WikiPage names and contains
109 *  a Set of all pages that refer to the indexing page. (Notice -
110 *  the keys of both Maps should be kept in sync.)
111 *  <P>
112 *  When a page is added or edited, its references are parsed, a Collection
113 *  is received, and we crudely replace anything previous with this new
114 *  Collection. We then check each referenced page name and make sure they
115 *  know they are referred to by the new page.
116 *  <P>
117 *  Based on this information, we can perform non-optimal searches for
118 *  e.g. unreferenced pages, top ten lists, etc.
119 *  <P>
120 *  The owning class must take responsibility of filling in any pre-existing
121 *  information, probably by loading each and every WikiPage and calling this
122 *  class to update the references when created.
123 *
124 *  @since 1.6.1
125 */
126
127// FIXME: The way that we save attributes is now a major booboo, and must be
128//        replace forthwith.  However, this is a workaround for the great deal
129//        of problems that occur here...
130
131public class ReferenceManager
132    extends BasicPageFilter
133    implements InternalModule, WikiEventListener
134{
135    /** Maps page wikiname to a Collection of pages it refers to. The Collection
136     *  must contain Strings. The Collection may contain names of non-existing
137     *  pages.
138     */
139    private Map<String,Collection<String>> m_refersTo;
140    private Map<String,Collection<String>> m_unmutableRefersTo;
141
142    /** Maps page wikiname to a Set of referring pages. The Set must
143     *  contain Strings. Non-existing pages (a reference exists, but not a file
144     *  for the page contents) may have an empty Set in m_referredBy.
145     */
146    private Map<String,Set<String>> m_referredBy;
147    private Map<String,Set<String>> m_unmutableReferredBy;
148
149    /** The WikiEngine that owns this object. */
150    private WikiEngine     m_engine;
151
152    private boolean        m_matchEnglishPlurals = false;
153
154    private static Logger log = Logger.getLogger(ReferenceManager.class);
155
156    private static final String SERIALIZATION_FILE = "refmgr.ser";
157    private static final String SERIALIZATION_DIR  = "refmgr-attr";
158
159    /** We use this also a generic serialization id */
160    private static final long serialVersionUID = 4L;
161
162    /**
163     *  Builds a new ReferenceManager.
164     *
165     *  @param engine The WikiEngine to which this is managing references to.
166     */
167    public ReferenceManager( WikiEngine engine )
168    {
169        m_refersTo   = new HashMap<String,Collection<String>>();
170        m_referredBy = new HashMap<String,Set<String>>();
171        m_engine = engine;
172
173        m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(),
174                                                             WikiEngine.PROP_MATCHPLURALS,
175                                                             m_matchEnglishPlurals );
176
177        //
178        //  Create two maps that contain unmutable versions of the two basic maps.
179        //
180        m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
181        m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
182    }
183
184    /**
185     *  Does a full reference update.  Does not sync; assumes that you do it afterwards.
186     */
187    private void updatePageReferences( WikiPage page ) throws ProviderException
188    {
189        String content = m_engine.getPageManager().getPageText( page.getName(),
190                                                                WikiPageProvider.LATEST_VERSION );
191
192        TreeSet<String> res = new TreeSet<String>();
193        Collection<String> links = m_engine.scanWikiLinks( page, content );
194
195        res.addAll( links );
196        Collection attachments = m_engine.getAttachmentManager().listAttachments( page );
197
198        for( Iterator atti = attachments.iterator(); atti.hasNext(); )
199        {
200            res.add( ((Attachment)(atti.next())).getName() );
201        }
202
203        internalUpdateReferences( page.getName(), res );
204    }
205
206    /**
207     *  Initializes the entire reference manager with the initial set of pages
208     *  from the collection.
209     *
210     *  @param pages A collection of all pages you want to be included in the reference
211     *               count.
212     *  @since 2.2
213     *  @throws ProviderException If reading of pages fail.
214     */
215    public void initialize( Collection pages )
216        throws ProviderException
217    {
218        log.debug( "Initializing new ReferenceManager with "+pages.size()+" initial pages." );
219        StopWatch sw = new StopWatch();
220        sw.start();
221        log.info( "Starting cross reference scan of WikiPages" );
222
223        //
224        //  First, try to serialize old data from disk.  If that fails,
225        //  we'll go and update the entire reference lists (which'll take
226        //  time)
227        //
228        try
229        {
230            //
231            //  Unserialize things.  The loop below cannot be combined with
232            //  the other loop below, simply because engine.getPage() has
233            //  side effects such as loading initializing the user databases,
234            //  which in turn want all of the pages to be read already...
235            //
236            //  Yes, this is a kludge.  We know.  Will be fixed.
237            //
238            long saved = unserializeFromDisk();
239
240            for( Iterator it = pages.iterator(); it.hasNext(); )
241            {
242                WikiPage page = (WikiPage) it.next();
243
244                unserializeAttrsFromDisk( page );
245            }
246
247            //
248            //  Now we must check if any of the pages have been changed
249            //  while we were in the electronic la-la-land, and update
250            //  the references for them.
251            //
252
253            Iterator it = pages.iterator();
254
255            while( it.hasNext() )
256            {
257                WikiPage page = (WikiPage) it.next();
258
259                if( page instanceof Attachment )
260                {
261                    // Skip attachments
262                }
263                else
264                {
265
266                    // Refresh with the latest copy
267                    page = m_engine.getPage( page.getName() );
268
269                    if( page.getLastModified() == null )
270                    {
271                        log.fatal( "Provider returns null lastModified.  Please submit a bug report." );
272                    }
273                    else if( page.getLastModified().getTime() > saved )
274                    {
275                        updatePageReferences( page );
276                    }
277                }
278            }
279
280        }
281        catch( Exception e )
282        {
283            log.info("Unable to unserialize old refmgr information, rebuilding database: "+e.getMessage());
284            buildKeyLists( pages );
285
286            // Scan the existing pages from disk and update references in the manager.
287            Iterator it = pages.iterator();
288            while( it.hasNext() )
289            {
290                WikiPage page  = (WikiPage)it.next();
291
292                if( page instanceof Attachment )
293                {
294                    // We cannot build a reference list from the contents
295                    // of attachments, so we skip them.
296                }
297                else
298                {
299                    updatePageReferences( page );
300
301                    serializeAttrsToDisk( page );
302                }
303
304            }
305
306            serializeToDisk();
307        }
308
309        sw.stop();
310        log.info( "Cross reference scan done in "+sw );
311
312        WikiEventUtils.addWikiEventListener(m_engine.getPageManager(),
313                                            WikiPageEvent.PAGE_DELETED, this);
314    }
315
316    /**
317     *  Reads the serialized data from the disk back to memory.
318     *  Returns the date when the data was last written on disk
319     */
320    @SuppressWarnings("unchecked")
321    private synchronized long unserializeFromDisk()
322        throws IOException,
323               ClassNotFoundException
324    {
325        ObjectInputStream in = null;
326        long saved = 0L;
327
328        try
329        {
330            StopWatch sw = new StopWatch();
331            sw.start();
332
333            File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
334
335            in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) );
336
337            long ver     = in.readLong();
338
339            if( ver != serialVersionUID )
340            {
341                throw new IOException("File format has changed; I need to recalculate references.");
342            }
343
344            saved        = in.readLong();
345            m_refersTo   = (Map) in.readObject();
346            m_referredBy = (Map) in.readObject();
347
348            in.close();
349
350            m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
351            m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
352
353            sw.stop();
354            log.debug("Read serialized data successfully in "+sw);
355        }
356        finally
357        {
358            if( in != null ) in.close();
359        }
360
361        return saved;
362    }
363
364    /**
365     *  Serializes hashmaps to disk.  The format is private, don't touch it.
366     */
367    private synchronized void serializeToDisk()
368    {
369        ObjectOutputStream out = null;
370
371        try
372        {
373            StopWatch sw = new StopWatch();
374            sw.start();
375
376            File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
377
378            out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) );
379
380            out.writeLong( serialVersionUID );
381            out.writeLong( System.currentTimeMillis() ); // Timestamp
382            out.writeObject( m_refersTo );
383            out.writeObject( m_referredBy );
384
385            out.close();
386
387            sw.stop();
388
389            log.debug("serialization done - took "+sw);
390        }
391        catch( IOException e )
392        {
393            log.error("Unable to serialize!");
394
395            try
396            {
397                if( out != null ) out.close();
398            }
399            catch( IOException ex ) {}
400        }
401    }
402
403    private String getHashFileName( String pageName )
404        throws NoSuchAlgorithmException
405    {
406        MessageDigest digest = MessageDigest.getInstance("MD5");
407
408        byte[] dig;
409        try
410        {
411            dig = digest.digest( pageName.getBytes("UTF-8") );
412        }
413        catch (UnsupportedEncodingException e)
414        {
415            throw new InternalWikiException("AAAAGH!  UTF-8 is gone!  My eyes!  It burns...!", e);
416        }
417
418        return TextUtil.toHexString(dig)+".cache";
419    }
420
421    /**
422     *  Reads the serialized data from the disk back to memory.
423     *  Returns the date when the data was last written on disk
424     */
425    private synchronized long unserializeAttrsFromDisk(WikiPage p)
426        throws IOException,
427               ClassNotFoundException
428    {
429        ObjectInputStream in = null;
430        long saved = 0L;
431
432        try
433        {
434            StopWatch sw = new StopWatch();
435            sw.start();
436
437            //
438            //  Find attribute cache, and check if it exists
439            //
440            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
441
442            f = new File( f, getHashFileName(p.getName()) );
443
444            if( !f.exists() )
445            {
446                return 0L;
447            }
448
449            log.debug("Deserializing attributes for "+p.getName());
450
451            in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) );
452
453            long ver     = in.readLong();
454
455            if( ver != serialVersionUID )
456            {
457                log.debug("File format has changed; cannot deserialize.");
458                return 0L;
459            }
460
461            saved        = in.readLong();
462
463            String name  = in.readUTF();
464
465            if( !name.equals(p.getName()) )
466            {
467                log.debug("File name does not match ("+name+"), skipping...");
468                return 0L; // Not here
469            }
470
471            long entries = in.readLong();
472
473            for( int i = 0; i < entries; i++ )
474            {
475                String key   = in.readUTF();
476                Object value = in.readObject();
477
478                p.setAttribute( key, value );
479
480                log.debug("   attr: "+key+"="+value);
481            }
482
483            in.close();
484
485            sw.stop();
486            log.debug("Read serialized data for "+name+" successfully in "+sw);
487            p.setHasMetadata();
488        }
489        catch( NoSuchAlgorithmException e )
490        {
491            log.fatal("No MD5!?!");
492        }
493        finally
494        {
495            if( in != null ) in.close();
496        }
497
498        return saved;
499    }
500
501    /**
502     *  Serializes hashmaps to disk.  The format is private, don't touch it.
503     */
504    private synchronized void serializeAttrsToDisk( WikiPage p )
505    {
506        ObjectOutputStream out = null;
507        StopWatch sw = new StopWatch();
508        sw.start();
509
510        try
511        {
512            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
513
514            if( !f.exists() ) f.mkdirs();
515
516            //
517            //  Create a digest for the name
518            //
519            f = new File( f, getHashFileName(p.getName()) );
520
521            // FIXME: There is a concurrency issue here...
522            Set entries = p.getAttributes().entrySet();
523
524            if( entries.size() == 0 ) 
525            {
526                //  Nothing to serialize, therefore we will just simply remove the
527                //  serialization file so that the next time we boot, we don't
528                //  deserialize old data.
529                f.delete();
530                return;
531            }
532
533            out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) );
534
535            out.writeLong( serialVersionUID );
536            out.writeLong( System.currentTimeMillis() ); // Timestamp
537
538            out.writeUTF( p.getName() );
539            out.writeLong( entries.size() );
540
541            for( Iterator i = entries.iterator(); i.hasNext(); )
542            {
543                Map.Entry e = (Map.Entry) i.next();
544
545                if( e.getValue() instanceof Serializable )
546                {
547                    out.writeUTF( (String)e.getKey() );
548                    out.writeObject( e.getValue() );
549                }
550            }
551
552            out.close();
553
554        }
555        catch( IOException e )
556        {
557            log.error("Unable to serialize!");
558
559            try
560            {
561                if( out != null ) out.close();
562            }
563            catch( IOException ex ) {}
564        }
565        catch( NoSuchAlgorithmException e )
566        {
567            log.fatal("No MD5 algorithm!?!");
568        }
569        finally
570        {
571            sw.stop();
572
573            log.debug("serialization for "+p.getName()+" done - took "+sw);
574        }
575    }
576
577    /**
578     *  After the page has been saved, updates the reference lists.
579     *  
580     *  @param context {@inheritDoc}
581     *  @param content {@inheritDoc}
582     */
583    public void postSave( WikiContext context, String content )
584    {
585        WikiPage page = context.getPage();
586
587        updateReferences( page.getName(),
588                          context.getEngine().scanWikiLinks( page, content ) );
589
590        serializeAttrsToDisk( page );
591    }
592
593    /**
594     * Updates the m_referedTo and m_referredBy hashmaps when a page has been
595     * deleted.
596     * <P>
597     * Within the m_refersTo map the pagename is a key. The whole key-value-set
598     * has to be removed to keep the map clean.
599     * Within the m_referredBy map the name is stored as a value. Since a key
600     * can have more than one value we have to delete just the key-value-pair
601     * referring page:deleted page.
602     *
603     *  @param page Name of the page to remove from the maps.
604     */
605    public synchronized void pageRemoved( WikiPage page )
606    {
607        String pageName = page.getName();
608
609        pageRemoved(pageName);
610    }
611
612    private void pageRemoved(String pageName)
613    {
614        Collection<String> refTo = m_refersTo.get( pageName );
615
616        if( refTo != null )
617        {
618            Iterator itRefTo = refTo.iterator();
619            while( itRefTo.hasNext() )
620            {
621                String referredPageName = (String)itRefTo.next();
622                Set<String> refBy = m_referredBy.get( referredPageName );
623
624                if( refBy == null )
625                    throw new InternalWikiException("Refmgr out of sync: page "+pageName+" refers to "+referredPageName+", which has null referrers.");
626
627                refBy.remove(pageName);
628
629                m_referredBy.remove( referredPageName );
630
631                // We won't put it back again if it becomes empty and does not exist.  It will be added
632                // later on anyway, if it becomes referenced again.
633                if( !(refBy.isEmpty() && !m_engine.pageExists(referredPageName)) )
634                {
635                    m_referredBy.put( referredPageName, refBy );
636                }
637            }
638
639            log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName ));
640            m_refersTo.remove( pageName );
641        }
642
643        Set<String> refBy = m_referredBy.get( pageName );
644        if( refBy == null || refBy.isEmpty() )
645        {
646            m_referredBy.remove( pageName );
647        }
648
649        //
650        //  Remove any traces from the disk, too
651        //
652        serializeToDisk();
653
654        try
655        {
656            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
657
658            f = new File( f, getHashFileName(pageName) );
659
660            if( f.exists() ) f.delete();
661        }
662        catch( NoSuchAlgorithmException e )
663        {
664            log.error("What do you mean - no such algorithm?", e);
665        }
666    }
667
668    /**
669     *  Updates the referred pages of a new or edited WikiPage. If a refersTo
670     *  entry for this page already exists, it is removed and a new one is built
671     *  from scratch. Also calls updateReferredBy() for each referenced page.
672     *  <P>
673     *  This is the method to call when a new page has been created and we
674     *  want to a) set up its references and b) notify the referred pages
675     *  of the references. Use this method during run-time.
676     *
677     *  @param page Name of the page to update.
678     *  @param references A Collection of Strings, each one pointing to a page this page references.
679     */
680    public synchronized void updateReferences( String page, Collection< String > references )
681    {
682        internalUpdateReferences(page, references);
683
684        serializeToDisk();
685    }
686
687    /**
688     *  Updates the referred pages of a new or edited WikiPage. If a refersTo
689     *  entry for this page already exists, it is removed and a new one is built
690     *  from scratch. Also calls updateReferredBy() for each referenced page.
691     *  <p>
692     *  This method does not synchronize the database to disk.
693     *
694     *  @param page Name of the page to update.
695     *  @param references A Collection of Strings, each one pointing to a page this page references.
696     */
697
698    private void internalUpdateReferences(String page, Collection< String > references)
699    {
700        page = getFinalPageName( page );
701
702        //
703        // Create a new entry in m_refersTo.
704        //
705        Collection oldRefTo = m_refersTo.get( page );
706        m_refersTo.remove( page );
707
708        TreeSet<String> cleanedRefs = new TreeSet<String>();
709        for( Iterator< String > i = references.iterator(); i.hasNext(); )
710        {
711            String ref = i.next();
712
713            ref = getFinalPageName( ref );
714
715            cleanedRefs.add( ref );
716        }
717
718        m_refersTo.put( page, cleanedRefs );
719
720        //
721        //  We know the page exists, since it's making references somewhere.
722        //  If an entry for it didn't exist previously in m_referredBy, make
723        //  sure one is added now.
724        //
725        if( !m_referredBy.containsKey( page ) )
726        {
727            m_referredBy.put( page, new TreeSet<String>() );
728        }
729
730        //
731        //  Get all pages that used to be referred to by 'page' and
732        //  remove that reference. (We don't want to try to figure out
733        //  which particular references were removed...)
734        //
735        cleanReferredBy( page, oldRefTo, cleanedRefs );
736
737        //
738        //  Notify all referred pages of their referinesshoodicity.
739        //
740        Iterator<String> it = cleanedRefs.iterator();
741        while( it.hasNext() )
742        {
743            String referredPageName = it.next();
744            updateReferredBy( getFinalPageName(referredPageName), page );
745        }
746    }
747
748    /**
749     * Returns the refers-to list. For debugging.
750     * 
751     * @return The refers-to list.
752     */
753    protected Map getRefersTo()
754    {
755        return m_refersTo;
756    }
757
758    /**
759     * Returns the referred-by list. For debugging.
760     * 
761     * @return Referred-by lists.
762     */
763    protected Map getReferredBy()
764    {
765        return m_referredBy;
766    }
767
768    /**
769     * Cleans the 'referred by' list, removing references by 'referrer' to
770     * any other page. Called after 'referrer' is removed.
771     */
772    private void cleanReferredBy( String referrer,
773                                  Collection oldReferred,
774                                  Collection<String> newReferred )
775    {
776        // Two ways to go about this. One is to look up all pages previously
777        // referred by referrer and remove referrer from their lists, and let
778        // the update put them back in (except possibly removed ones).
779        // The other is to get the old referred to list, compare to the new,
780        // and tell the ones missing in the latter to remove referrer from
781        // their list. Hm. We'll just try the first for now. Need to come
782        // back and optimize this a bit.
783
784        if( oldReferred == null )
785            return;
786
787        Iterator it = oldReferred.iterator();
788        while( it.hasNext() )
789        {
790            String referredPage = (String)it.next();
791            Set oldRefBy = m_referredBy.get( referredPage );
792            if( oldRefBy != null )
793            {
794                oldRefBy.remove( referrer );
795            }
796
797            // If the page is referred to by no one AND it doesn't even
798            // exist, we might just as well forget about this entry.
799            // It will be added again elsewhere if new references appear.
800            if( ( ( oldRefBy == null ) || ( oldRefBy.isEmpty() ) ) &&
801                ( m_engine.pageExists( referredPage ) == false ) )
802            {
803                m_referredBy.remove( referredPage );
804            }
805        }
806
807    }
808
809
810    /**
811     *  When initially building a ReferenceManager from scratch, call this method
812     * BEFORE calling updateReferences() with a full list of existing page names.
813     * It builds the refersTo and referredBy key lists, thus enabling
814     * updateReferences() to function correctly.
815     * <P>
816     * This method should NEVER be called after initialization. It clears all mappings
817     * from the reference tables.
818     *
819     * @param pages   a Collection containing WikiPage objects.
820     */
821    private synchronized void buildKeyLists( Collection pages )
822    {
823        m_refersTo.clear();
824        m_referredBy.clear();
825
826        if( pages == null )
827            return;
828
829        Iterator it = pages.iterator();
830        try
831        {
832            while( it.hasNext() )
833            {
834                WikiPage page = (WikiPage)it.next();
835                // We add a non-null entry to referredBy to indicate the referred page exists
836                m_referredBy.put( page.getName(), new TreeSet<String>() );
837                // Just add a key to refersTo; the keys need to be in sync with referredBy.
838                m_refersTo.put( page.getName(), null );
839            }
840        }
841        catch( ClassCastException e )
842        {
843            log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e );
844        }
845    }
846
847
848    /**
849     * Marks the page as referred to by the referrer. If the page does not
850     * exist previously, nothing is done. (This means that some page, somewhere,
851     * has a link to a page that does not exist.)
852     * <P>
853     * This method is NOT synchronized. It should only be referred to from
854     * within a synchronized method, or it should be made synced if necessary.
855     */
856    private void updateReferredBy( String page, String referrer )
857    {
858        // We're not really interested in first level self-references.
859        /*
860        if( page.equals( referrer ) )
861        {
862            return;
863        }
864        */
865        // Neither are we interested if plural forms refer to each other.
866        if( m_matchEnglishPlurals )
867        {
868            String p2 = page.endsWith("s") ? page.substring(0,page.length()-1) : page+"s";
869
870            if( referrer.equals(p2) )
871            {
872                return;
873            }
874        }
875
876        Set<String> referrers = m_referredBy.get( page );
877
878        // Even if 'page' has not been created yet, it can still be referenced.
879        // This requires we don't use m_referredBy keys when looking up missing
880        // pages, of course.
881        if(referrers == null)
882        {
883            referrers = new TreeSet<String>();
884            m_referredBy.put( page, referrers );
885        }
886        referrers.add( referrer );
887    }
888
889
890    /**
891     * Clears the references to a certain page so it's no longer in the map.
892     *
893     * @param pagename  Name of the page to clear references for.
894     */
895    public synchronized void clearPageEntries( String pagename )
896    {
897        pagename = getFinalPageName(pagename);
898
899        //
900        //  Remove this item from the referredBy list of any page
901        //  which this item refers to.
902        //
903        Collection<String> c = m_refersTo.get( pagename );
904
905        if( c != null )
906        {
907            for( String key : c )
908            {
909                Collection<?> dref = m_referredBy.get( key );
910
911                dref.remove( pagename );
912            }
913        }
914
915        //
916        //  Finally, remove direct references.
917        //
918        m_referredBy.remove( pagename );
919        m_refersTo.remove( pagename );
920    }
921
922
923    /**
924     *  Finds all unreferenced pages. This requires a linear scan through
925     *  m_referredBy to locate keys with null or empty values.
926     *  
927     *  @return The Collection of Strings
928     */
929    public synchronized Collection findUnreferenced()
930    {
931        ArrayList<String> unref = new ArrayList<String>();
932
933        for( String key : m_referredBy.keySet() )
934        {
935            Set<?> refs = getReferenceList( m_referredBy, key );
936            
937            if( refs == null || refs.isEmpty() )
938            {
939                unref.add( key );
940            }
941        }
942
943        return unref;
944    }
945
946
947    /**
948     * Finds all references to non-existant pages. This requires a linear
949     * scan through m_refersTo values; each value must have a corresponding
950     * key entry in the reference Maps, otherwise such a page has never
951     * been created.
952     * <P>
953     * Returns a Collection containing Strings of unreferenced page names.
954     * Each non-existant page name is shown only once - we don't return information
955     * on who referred to it.
956     * 
957     * @return A Collection of Strings
958     */
959    public synchronized Collection findUncreated()
960    {
961        TreeSet<String> uncreated = new TreeSet<String>();
962
963        // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
964        // We want to reread the code to make sure our HashMaps are in sync...
965
966        Collection<Collection<String>> allReferences = m_refersTo.values();
967
968        for( Collection<String> refs : allReferences )
969        {
970            if( refs != null )
971            {
972                for( String aReference : refs )
973                {
974                    if( m_engine.pageExists( aReference ) == false )
975                    {
976                        uncreated.add( aReference );
977                    }
978                }
979            }
980        }
981
982        return uncreated;
983    }
984
985    /**
986     *  Searches for the given page in the given Map, and returns
987     *  the set of references.  This method also takes care of English plural
988     *  matching.
989     *  
990     *  @param coll The Map to search in
991     *  @param pagename The name to find.
992     *  @return The references list.
993     */
994    private <T> Set<T> getReferenceList( Map<String,Set<T>> coll, String pagename )
995    {
996        Set<T> refs = coll.get( pagename );
997
998        if( m_matchEnglishPlurals )
999        {
1000            //
1001            //  We'll add also matches from the "other" page.
1002            //
1003            Set<T> refs2;
1004
1005            if( pagename.endsWith("s") )
1006            {
1007                refs2 = coll.get( pagename.substring(0,pagename.length()-1) );
1008            }
1009            else
1010            {
1011                refs2 = coll.get( pagename+"s" );
1012            }
1013
1014            if( refs2 != null )
1015            {
1016                if( refs != null )
1017                    refs.addAll( refs2 );
1018                else
1019                    refs = refs2;
1020            }
1021        }
1022        return refs;
1023    }
1024
1025    /**
1026     * Find all pages that refer to this page. Returns null if the page
1027     * does not exist or is not referenced at all, otherwise returns a
1028     * collection containing page names (String) that refer to this one.
1029     * <p>
1030     * @param pagename The page to find referrers for.
1031     * @return A Collection of Strings.  (This is, in fact, a Set, and is likely
1032     *         to change at some point to a Set).  May return null, if the page
1033     *         does not exist, or if it has no references.
1034     */
1035    // FIXME: Return a Set instead of a Collection.
1036    public synchronized Collection findReferrers( String pagename )
1037    {
1038        Set<String> refs = getReferenceList( m_referredBy, pagename );
1039
1040        if( refs == null || refs.isEmpty() )
1041        {
1042            return null;
1043        }
1044
1045        return refs;
1046
1047    }
1048
1049    /**
1050     *  Returns all pages that refer to this page.  Note that this method
1051     *  returns an unmodifiable Map, which may be abruptly changed.  So any
1052     *  access to any iterator may result in a ConcurrentModificationException.
1053     *  <p>
1054     *  The advantages of using this method over findReferrers() is that
1055     *  it is very fast, as it does not create a new object.  The disadvantages
1056     *  are that it does not do any mapping between plural names, and you
1057     *  may end up getting a ConcurrentModificationException.
1058     *
1059     * @param pageName Page name to query.
1060     * @return A Set of Strings containing the names of all the pages that refer
1061     *         to this page.  May return null, if the page does not exist or
1062     *         has not been indexed yet.
1063     * @since 2.2.33
1064     */
1065    public Set findReferredBy( String pageName )
1066    {
1067        return m_unmutableReferredBy.get( getFinalPageName(pageName) );
1068    }
1069
1070    /**
1071     *  Returns all pages that this page refers to.  You can use this as a quick
1072     *  way of getting the links from a page, but note that it does not link any
1073     *  InterWiki, image, or external links.  It does contain attachments, though.
1074     *  <p>
1075     *  The Collection returned is unmutable, so you cannot change it.  It does reflect
1076     *  the current status and thus is a live object.  So, if you are using any
1077     *  kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
1078     *  <p>
1079     *  The returned value is a Collection, because a page may refer to another page
1080     *  multiple times.
1081     *
1082     * @param pageName Page name to query
1083     * @return A Collection of Strings containing the names of the pages that this page
1084     *         refers to. May return null, if the page does not exist or has not
1085     *         been indexed yet.
1086     * @since 2.2.33
1087     */
1088    public Collection findRefersTo( String pageName )
1089    {
1090        return m_unmutableRefersTo.get( getFinalPageName(pageName) );
1091    }
1092
1093    /**
1094     * This 'deepHashCode' can be used to determine if there were any
1095     * modifications made to the underlying to and by maps of the
1096     * ReferenceManager. The maps of the ReferenceManager are not
1097     * synchronized, so someone could add/remove entries in them while the
1098     * hashCode is being computed.
1099     *
1100     * @return Sum of the hashCodes for the to and by maps of the
1101     *         ReferenceManager
1102     * @since 2.3.24
1103     */
1104    //
1105    //   This method traps and retries if a concurrent
1106    //   modifcaition occurs.
1107    //   TODO: It is unnecessary to calculate the hashcode; it should be calculated only
1108    //         when the hashmaps are changed.  This is slow.
1109    //
1110    public int deepHashCode()
1111    {
1112        boolean failed = true;
1113        int signature = 0;
1114
1115        while (failed)
1116        {
1117            signature = 0;
1118            try
1119            {
1120                signature ^= m_referredBy.hashCode();
1121                signature ^= m_refersTo.hashCode();
1122                failed = false;
1123            }
1124            catch (ConcurrentModificationException e)
1125            {
1126                Thread.yield();
1127            }
1128        }
1129
1130        return signature;
1131    }
1132
1133    /**
1134     *  Returns a list of all pages that the ReferenceManager knows about.
1135     *  This should be roughly equivalent to PageManager.getAllPages(), but without
1136     *  the potential disk access overhead.  Note that this method is not guaranteed
1137     *  to return a Set of really all pages (especially during startup), but it is
1138     *  very fast.
1139     *
1140     *  @return A Set of all defined page names that ReferenceManager knows about.
1141     *  @since 2.3.24
1142     */
1143    public Set findCreated()
1144    {
1145        return new HashSet<String>( m_refersTo.keySet() );
1146    }
1147
1148    private String getFinalPageName( String orig )
1149    {
1150        try
1151        {
1152            String s = m_engine.getFinalPageName( orig );
1153
1154            if( s == null ) s = orig;
1155
1156            return s;
1157        }
1158        catch( ProviderException e )
1159        {
1160            log.error("Error while trying to fetch a page name; trying to cope with the situation.",e);
1161
1162            return orig;
1163        }
1164    }
1165
1166    /**
1167     *  {@inheritDoc}
1168     */
1169    public void actionPerformed(WikiEvent event)
1170    {
1171        if( (event instanceof WikiPageEvent) && (event.getType() == WikiPageEvent.PAGE_DELETED) )
1172        {
1173            String pageName = ((WikiPageEvent) event).getPageName();
1174
1175            if( pageName != null )
1176            {
1177                pageRemoved( pageName );
1178            }
1179        }
1180    }
1181}