001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.    
018 */
019package org.apache.wiki;
020
021import java.io.BufferedInputStream;
022import java.io.BufferedOutputStream;
023import java.io.File;
024import java.io.FileInputStream;
025import java.io.FileOutputStream;
026import java.io.IOException;
027import java.io.ObjectInputStream;
028import java.io.ObjectOutputStream;
029import java.io.Serializable;
030import java.nio.charset.StandardCharsets;
031import java.security.MessageDigest;
032import java.security.NoSuchAlgorithmException;
033import java.util.ArrayList;
034import java.util.Collection;
035import java.util.Collections;
036import java.util.ConcurrentModificationException;
037import java.util.HashMap;
038import java.util.HashSet;
039import java.util.Iterator;
040import java.util.List;
041import java.util.Map;
042import java.util.Set;
043import java.util.TreeSet;
044
045import org.apache.commons.lang.time.StopWatch;
046import org.apache.log4j.Logger;
047import org.apache.wiki.api.exceptions.ProviderException;
048import org.apache.wiki.api.filters.BasicPageFilter;
049import org.apache.wiki.attachment.Attachment;
050import org.apache.wiki.event.WikiEvent;
051import org.apache.wiki.event.WikiEventListener;
052import org.apache.wiki.event.WikiEventUtils;
053import org.apache.wiki.event.WikiPageEvent;
054import org.apache.wiki.modules.InternalModule;
055import org.apache.wiki.providers.WikiPageProvider;
056import org.apache.wiki.util.TextUtil;
057
058/*
059  BUGS
060
061  - if a wikilink is added to a page, then removed, RefMan still thinks that
062    the page refers to the wikilink page. Hm.
063
064  - if a page is deleted, gets very confused.
065
066  - Serialization causes page attributes to be missing, when InitializablePlugins
067    are not executed properly.  Thus, serialization should really also mark whether
068    a page is serializable or not...
069 */
070
071
072/*
073   A word about synchronizing:
074
075   I expect this object to be accessed in three situations:
076   - when a WikiEngine is created and it scans its wikipages
077   - when the WE saves a page
078   - when a JSP page accesses one of the WE's ReferenceManagers
079     to display a list of (un)referenced pages.
080
081   So, access to this class is fairly rare, and usually triggered by
082   user interaction. OTOH, the methods in this class use their storage
083   objects intensively (and, sorry to say, in an unoptimized manner =).
084   My deduction: using unsynchronized HashMaps etc and syncing methods
085   or code blocks is preferrable to using slow, synced storage objects.
086   We don't have iterative code here, so I'm going to use synced methods
087   for now.
088
089   Please contact me if you notice problems with ReferenceManager, and
090   especially with synchronization, or if you have suggestions about
091   syncing.
092
093   ebu@memecry.net
094*/
095
096/**
097 *  Keeps track of wikipage references:
098 *  <UL>
099 *  <LI>What pages a given page refers to
100 *  <LI>What pages refer to a given page
101 *  </UL>
102 *
103 *  This is a quick'n'dirty approach without any finesse in storage and
104 *  searching algorithms; we trust java.util.*.
105 *  <P>
106 *  This class contains two HashMaps, m_refersTo and m_referredBy. The
107 *  first is indexed by WikiPage names and contains a Collection of all
108 *  WikiPages the page refers to. (Multiple references are not counted,
109 *  naturally.) The second is indexed by WikiPage names and contains
110 *  a Set of all pages that refer to the indexing page. (Notice -
111 *  the keys of both Maps should be kept in sync.)
112 *  <P>
113 *  When a page is added or edited, its references are parsed, a Collection
114 *  is received, and we crudely replace anything previous with this new
115 *  Collection. We then check each referenced page name and make sure they
116 *  know they are referred to by the new page.
117 *  <P>
118 *  Based on this information, we can perform non-optimal searches for
119 *  e.g. unreferenced pages, top ten lists, etc.
120 *  <P>
121 *  The owning class must take responsibility of filling in any pre-existing
122 *  information, probably by loading each and every WikiPage and calling this
123 *  class to update the references when created.
124 *
125 *  @since 1.6.1
126 */
127
128// FIXME: The way that we save attributes is now a major booboo, and must be
129//        replace forthwith.  However, this is a workaround for the great deal
130//        of problems that occur here...
131
132public class ReferenceManager
133    extends BasicPageFilter
134    implements InternalModule, WikiEventListener
135{
136    /** Maps page wikiname to a Collection of pages it refers to. The Collection
137     *  must contain Strings. The Collection may contain names of non-existing
138     *  pages.
139     */
140    private Map<String,Collection<String>> m_refersTo;
141    private Map<String,Collection<String>> m_unmutableRefersTo;
142
143    /** Maps page wikiname to a Set of referring pages. The Set must
144     *  contain Strings. Non-existing pages (a reference exists, but not a file
145     *  for the page contents) may have an empty Set in m_referredBy.
146     */
147    private Map<String,Set<String>> m_referredBy;
148    private Map<String,Set<String>> m_unmutableReferredBy;
149
150    /** The WikiEngine that owns this object. */
151    private WikiEngine     m_engine;
152
153    private boolean        m_matchEnglishPlurals = false;
154
155    private static Logger log = Logger.getLogger(ReferenceManager.class);
156
157    private static final String SERIALIZATION_FILE = "refmgr.ser";
158    private static final String SERIALIZATION_DIR  = "refmgr-attr";
159
160    /** We use this also a generic serialization id */
161    private static final long serialVersionUID = 4L;
162
163    /**
164     *  Builds a new ReferenceManager.
165     *
166     *  @param engine The WikiEngine to which this is managing references to.
167     */
168    public ReferenceManager( WikiEngine engine )
169    {
170        m_refersTo   = new HashMap<>();
171        m_referredBy = new HashMap<>();
172        m_engine = engine;
173
174        m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(),
175                                                             WikiEngine.PROP_MATCHPLURALS,
176                                                             m_matchEnglishPlurals );
177
178        //
179        //  Create two maps that contain unmutable versions of the two basic maps.
180        //
181        m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
182        m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
183    }
184
185    /**
186     *  Does a full reference update.  Does not sync; assumes that you do it afterwards.
187     */
188    private void updatePageReferences( WikiPage page ) throws ProviderException
189    {
190        String content = m_engine.getPageManager().getPageText( page.getName(),
191                                                                WikiPageProvider.LATEST_VERSION );
192
193        TreeSet<String> res = new TreeSet<String>();
194        Collection<String> links = m_engine.scanWikiLinks( page, content );
195
196        res.addAll( links );
197        List< Attachment > attachments = m_engine.getAttachmentManager().listAttachments( page );
198
199        for( Iterator< Attachment > atti = attachments.iterator(); atti.hasNext(); )
200        {
201            res.add( atti.next().getName() );
202        }
203
204        internalUpdateReferences( page.getName(), res );
205    }
206
207    /**
208     *  Initializes the entire reference manager with the initial set of pages
209     *  from the collection.
210     *
211     *  @param pages A collection of all pages you want to be included in the reference
212     *               count.
213     *  @since 2.2
214     *  @throws ProviderException If reading of pages fail.
215     */
216    public void initialize( Collection< WikiPage > pages ) throws ProviderException
217    {
218        log.debug( "Initializing new ReferenceManager with "+pages.size()+" initial pages." );
219        StopWatch sw = new StopWatch();
220        sw.start();
221        log.info( "Starting cross reference scan of WikiPages" );
222
223        //
224        //  First, try to serialize old data from disk.  If that fails,
225        //  we'll go and update the entire reference lists (which'll take
226        //  time)
227        //
228        try
229        {
230            //
231            //  Unserialize things.  The loop below cannot be combined with
232            //  the other loop below, simply because engine.getPage() has
233            //  side effects such as loading initializing the user databases,
234            //  which in turn want all of the pages to be read already...
235            //
236            //  Yes, this is a kludge.  We know.  Will be fixed.
237            //
238            long saved = unserializeFromDisk();
239
240            for( Iterator< WikiPage > it = pages.iterator(); it.hasNext(); )
241            {
242                WikiPage page = it.next();
243
244                unserializeAttrsFromDisk( page );
245            }
246
247            //
248            //  Now we must check if any of the pages have been changed
249            //  while we were in the electronic la-la-land, and update
250            //  the references for them.
251            //
252
253            Iterator< WikiPage > it = pages.iterator();
254
255            while( it.hasNext() )
256            {
257                WikiPage page = it.next();
258
259                if( page instanceof Attachment )
260                {
261                    // Skip attachments
262                }
263                else
264                {
265
266                    // Refresh with the latest copy
267                    page = m_engine.getPage( page.getName() );
268
269                    if( page.getLastModified() == null )
270                    {
271                        log.fatal( "Provider returns null lastModified.  Please submit a bug report." );
272                    }
273                    else if( page.getLastModified().getTime() > saved )
274                    {
275                        updatePageReferences( page );
276                    }
277                }
278            }
279
280        }
281        catch( Exception e )
282        {
283            log.info("Unable to unserialize old refmgr information, rebuilding database: "+e.getMessage());
284            buildKeyLists( pages );
285
286            // Scan the existing pages from disk and update references in the manager.
287            Iterator< WikiPage > it = pages.iterator();
288            while( it.hasNext() )
289            {
290                WikiPage page  = it.next();
291
292                if( page instanceof Attachment )
293                {
294                    // We cannot build a reference list from the contents
295                    // of attachments, so we skip them.
296                }
297                else
298                {
299                    updatePageReferences( page );
300
301                    serializeAttrsToDisk( page );
302                }
303
304            }
305
306            serializeToDisk();
307        }
308
309        sw.stop();
310        log.info( "Cross reference scan done in "+sw );
311
312        WikiEventUtils.addWikiEventListener(m_engine.getPageManager(), WikiPageEvent.PAGE_DELETED, this);
313    }
314
315    /**
316     *  Reads the serialized data from the disk back to memory.
317     *  Returns the date when the data was last written on disk
318     */
319    @SuppressWarnings("unchecked")
320    private synchronized long unserializeFromDisk()
321        throws IOException,
322               ClassNotFoundException
323    {
324        long saved = 0L;
325
326        File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
327        try( ObjectInputStream in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) ) )
328        {
329            StopWatch sw = new StopWatch();
330            sw.start();
331
332            long ver     = in.readLong();
333
334            if( ver != serialVersionUID )
335            {
336                throw new IOException("File format has changed; I need to recalculate references.");
337            }
338
339            saved        = in.readLong();
340            m_refersTo   = ( Map< String, Collection< String > > ) in.readObject();
341            m_referredBy = ( Map< String, Set< String > > ) in.readObject();
342
343            in.close();
344
345            m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
346            m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
347
348            sw.stop();
349            log.debug("Read serialized data successfully in "+sw);
350        }
351
352        return saved;
353    }
354
355    /**
356     *  Serializes hashmaps to disk.  The format is private, don't touch it.
357     */
358    private synchronized void serializeToDisk()
359    {
360        File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
361        try( ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) {
362            StopWatch sw = new StopWatch();
363            sw.start();
364
365            out.writeLong( serialVersionUID );
366            out.writeLong( System.currentTimeMillis() ); // Timestamp
367            out.writeObject( m_refersTo );
368            out.writeObject( m_referredBy );
369
370            out.close();
371
372            sw.stop();
373
374            log.debug("serialization done - took "+sw);
375        }
376        catch( IOException ioe )
377        {
378            log.error("Unable to serialize!", ioe);
379        }
380    }
381
382    private String getHashFileName( String pageName ) {
383        try {
384            MessageDigest digest = MessageDigest.getInstance("MD5");
385            byte[] dig = digest.digest( pageName.getBytes(StandardCharsets.UTF_8) );
386
387            return TextUtil.toHexString(dig)+".cache";
388        } catch( NoSuchAlgorithmException e ) {
389            log.fatal( "What do you mean - no such algorithm?", e );
390            return null;
391        }
392    }
393
394    /**
395     *  Reads the serialized data from the disk back to memory.
396     *  Returns the date when the data was last written on disk
397     */
398    private synchronized long unserializeAttrsFromDisk(WikiPage p)
399        throws IOException,
400               ClassNotFoundException
401    {
402        long saved = 0L;
403
404        //
405        //  Find attribute cache, and check if it exists
406        //
407        String hashName = getHashFileName( p.getName() );
408        if( hashName != null ) {
409            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
410
411            f = new File( f, hashName );
412
413            if( !f.exists() )
414            {
415                return 0L;
416            }
417            try( ObjectInputStream in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) ))
418            {
419                StopWatch sw = new StopWatch();
420                sw.start();
421
422                log.debug("Deserializing attributes for "+p.getName());
423
424                long ver = in.readLong();
425
426                if( ver != serialVersionUID )
427                {
428                    log.debug("File format has changed; cannot deserialize.");
429                    return 0L;
430                }
431
432                saved        = in.readLong();
433
434                String name  = in.readUTF();
435
436                if( !name.equals(p.getName()) )
437                {
438                    log.debug("File name does not match ("+name+"), skipping...");
439                    return 0L; // Not here
440                }
441
442                long entries = in.readLong();
443
444                for( int i = 0; i < entries; i++ )
445                {
446                    String key   = in.readUTF();
447                    Object value = in.readObject();
448
449                    p.setAttribute( key, value );
450
451                    log.debug("   attr: "+key+"="+value);
452                }
453
454                in.close();
455
456                sw.stop();
457                log.debug("Read serialized data for "+name+" successfully in "+sw);
458                p.setHasMetadata();
459            }
460        }
461        
462
463        return saved;
464    }
465
466    /**
467     *  Serializes hashmaps to disk.  The format is private, don't touch it.
468     */
469    private synchronized void serializeAttrsToDisk( WikiPage p )
470    {
471        StopWatch sw = new StopWatch();
472        sw.start();
473
474        String hashName = getHashFileName( p.getName() );
475        if( hashName != null ) {
476            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
477
478            if( !f.exists() ) f.mkdirs();
479
480            //
481            //  Create a digest for the name
482            //
483            f = new File( f, hashName );
484            
485            try( ObjectOutputStream out =  new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) {
486                Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() ); // new Set to avoid concurrency issue
487
488                if( entries.size() == 0 ) 
489                {
490                    //  Nothing to serialize, therefore we will just simply remove the
491                    //  serialization file so that the next time we boot, we don't
492                    //  deserialize old data.
493                    f.delete();
494                    return;
495                }
496
497                out.writeLong( serialVersionUID );
498                out.writeLong( System.currentTimeMillis() ); // Timestamp
499
500                out.writeUTF( p.getName() );
501                out.writeLong( entries.size() );
502
503                for( Iterator< Map.Entry < String, Object > > i = entries.iterator(); i.hasNext(); ) {
504                    Map.Entry< String, Object > e = i.next();
505
506                    if( e.getValue() instanceof Serializable ) {
507                        out.writeUTF( e.getKey() );
508                        out.writeObject( e.getValue() );
509                    }
510                }
511
512            } catch( IOException e ) {
513                log.error( "Unable to serialize!", e );
514            } finally {
515                sw.stop();
516                log.debug("serialization for "+p.getName()+" done - took "+sw);
517            }
518        }
519        
520    }
521
522    /**
523     *  After the page has been saved, updates the reference lists.
524     *  
525     *  @param context {@inheritDoc}
526     *  @param content {@inheritDoc}
527     */
528    @Override
529    public void postSave( WikiContext context, String content )
530    {
531        WikiPage page = context.getPage();
532
533        updateReferences( page.getName(),
534                          context.getEngine().scanWikiLinks( page, content ) );
535
536        serializeAttrsToDisk( page );
537    }
538
539    /**
540     * Updates the m_referedTo and m_referredBy hashmaps when a page has been
541     * deleted.
542     * <P>
543     * Within the m_refersTo map the pagename is a key. The whole key-value-set
544     * has to be removed to keep the map clean.
545     * Within the m_referredBy map the name is stored as a value. Since a key
546     * can have more than one value we have to delete just the key-value-pair
547     * referring page:deleted page.
548     *
549     *  @param page Name of the page to remove from the maps.
550     */
551    public synchronized void pageRemoved( WikiPage page )
552    {
553        String pageName = page.getName();
554
555        pageRemoved(pageName);
556    }
557
558    private void pageRemoved(String pageName)
559    {
560        Collection<String> refTo = m_refersTo.get( pageName );
561
562        if( refTo != null )
563        {
564            Iterator< String > itRefTo = refTo.iterator();
565            while( itRefTo.hasNext() )
566            {
567                String referredPageName = itRefTo.next();
568                Set<String> refBy = m_referredBy.get( referredPageName );
569
570                if( refBy == null )
571                    throw new InternalWikiException("Refmgr out of sync: page "+pageName+" refers to "+referredPageName+", which has null referrers.");
572
573                refBy.remove(pageName);
574
575                m_referredBy.remove( referredPageName );
576
577                // We won't put it back again if it becomes empty and does not exist.  It will be added
578                // later on anyway, if it becomes referenced again.
579                if( !(refBy.isEmpty() && !m_engine.pageExists(referredPageName)) )
580                {
581                    m_referredBy.put( referredPageName, refBy );
582                }
583            }
584
585            log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName ));
586            m_refersTo.remove( pageName );
587        }
588
589        Set<String> refBy = m_referredBy.get( pageName );
590        if( refBy == null || refBy.isEmpty() )
591        {
592            m_referredBy.remove( pageName );
593        }
594
595        //
596        //  Remove any traces from the disk, too
597        //
598        serializeToDisk();
599
600        String hashName = getHashFileName( pageName );
601        if( hashName != null ) {
602            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
603
604            f = new File( f, getHashFileName(pageName) );
605
606            if( f.exists() ) f.delete();
607        }
608    }
609
610    /**
611     *  Updates the referred pages of a new or edited WikiPage. If a refersTo
612     *  entry for this page already exists, it is removed and a new one is built
613     *  from scratch. Also calls updateReferredBy() for each referenced page.
614     *  <P>
615     *  This is the method to call when a new page has been created and we
616     *  want to a) set up its references and b) notify the referred pages
617     *  of the references. Use this method during run-time.
618     *
619     *  @param page Name of the page to update.
620     *  @param references A Collection of Strings, each one pointing to a page this page references.
621     */
622    public synchronized void updateReferences( String page, Collection< String > references )
623    {
624        internalUpdateReferences(page, references);
625
626        serializeToDisk();
627    }
628
629    /**
630     *  Updates the referred pages of a new or edited WikiPage. If a refersTo
631     *  entry for this page already exists, it is removed and a new one is built
632     *  from scratch. Also calls updateReferredBy() for each referenced page.
633     *  <p>
634     *  This method does not synchronize the database to disk.
635     *
636     *  @param page Name of the page to update.
637     *  @param references A Collection of Strings, each one pointing to a page this page references.
638     */
639
640    private void internalUpdateReferences(String page, Collection< String > references)
641    {
642        page = getFinalPageName( page );
643
644        //
645        // Create a new entry in m_refersTo.
646        //
647        Collection< String > oldRefTo = m_refersTo.get( page );
648        m_refersTo.remove( page );
649
650        TreeSet<String> cleanedRefs = new TreeSet<>();
651        for( Iterator< String > i = references.iterator(); i.hasNext(); )
652        {
653            String ref = i.next();
654
655            ref = getFinalPageName( ref );
656
657            cleanedRefs.add( ref );
658        }
659
660        m_refersTo.put( page, cleanedRefs );
661
662        //
663        //  We know the page exists, since it's making references somewhere.
664        //  If an entry for it didn't exist previously in m_referredBy, make
665        //  sure one is added now.
666        //
667        if( !m_referredBy.containsKey( page ) )
668        {
669            m_referredBy.put( page, new TreeSet<String>() );
670        }
671
672        //
673        //  Get all pages that used to be referred to by 'page' and
674        //  remove that reference. (We don't want to try to figure out
675        //  which particular references were removed...)
676        //
677        cleanReferredBy( page, oldRefTo, cleanedRefs );
678
679        //
680        //  Notify all referred pages of their referinesshoodicity.
681        //
682        Iterator<String> it = cleanedRefs.iterator();
683        while( it.hasNext() )
684        {
685            String referredPageName = it.next();
686            updateReferredBy( getFinalPageName(referredPageName), page );
687        }
688    }
689
690    /**
691     * Returns the refers-to list. For debugging.
692     * 
693     * @return The refers-to list.
694     */
695    protected Map< String, Collection< String > > getRefersTo()
696    {
697        return m_refersTo;
698    }
699
700    /**
701     * Returns the referred-by list. For debugging.
702     * 
703     * @return Referred-by lists.
704     */
705    protected Map< String, Set< String > > getReferredBy()
706    {
707        return m_referredBy;
708    }
709
710    /**
711     * Cleans the 'referred by' list, removing references by 'referrer' to
712     * any other page. Called after 'referrer' is removed.
713     */
714    private void cleanReferredBy( String referrer,
715                                  Collection<String> oldReferred,
716                                  Collection<String> newReferred )
717    {
718        // Two ways to go about this. One is to look up all pages previously
719        // referred by referrer and remove referrer from their lists, and let
720        // the update put them back in (except possibly removed ones).
721        // The other is to get the old referred to list, compare to the new,
722        // and tell the ones missing in the latter to remove referrer from
723        // their list. Hm. We'll just try the first for now. Need to come
724        // back and optimize this a bit.
725
726        if( oldReferred == null )
727            return;
728
729        Iterator< String > it = oldReferred.iterator();
730        while( it.hasNext() )
731        {
732            String referredPage = it.next();
733            Set< String > oldRefBy = m_referredBy.get( referredPage );
734            if( oldRefBy != null )
735            {
736                oldRefBy.remove( referrer );
737            }
738
739            // If the page is referred to by no one AND it doesn't even
740            // exist, we might just as well forget about this entry.
741            // It will be added again elsewhere if new references appear.
742            if( ( ( oldRefBy == null ) || ( oldRefBy.isEmpty() ) ) &&
743                ( m_engine.pageExists( referredPage ) == false ) )
744            {
745                m_referredBy.remove( referredPage );
746            }
747        }
748
749    }
750
751
752    /**
753     *  When initially building a ReferenceManager from scratch, call this method
754     * BEFORE calling updateReferences() with a full list of existing page names.
755     * It builds the refersTo and referredBy key lists, thus enabling
756     * updateReferences() to function correctly.
757     * <P>
758     * This method should NEVER be called after initialization. It clears all mappings
759     * from the reference tables.
760     *
761     * @param pages   a Collection containing WikiPage objects.
762     */
763    private synchronized void buildKeyLists( Collection< WikiPage > pages )
764    {
765        m_refersTo.clear();
766        m_referredBy.clear();
767
768        if( pages == null )
769            return;
770
771        Iterator< WikiPage > it = pages.iterator();
772        try
773        {
774            while( it.hasNext() )
775            {
776                WikiPage page = it.next();
777                // We add a non-null entry to referredBy to indicate the referred page exists
778                m_referredBy.put( page.getName(), new TreeSet<String>() );
779                // Just add a key to refersTo; the keys need to be in sync with referredBy.
780                m_refersTo.put( page.getName(), null );
781            }
782        }
783        catch( ClassCastException e )
784        {
785            log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e );
786        }
787    }
788
789
790    /**
791     * Marks the page as referred to by the referrer. If the page does not
792     * exist previously, nothing is done. (This means that some page, somewhere,
793     * has a link to a page that does not exist.)
794     * <P>
795     * This method is NOT synchronized. It should only be referred to from
796     * within a synchronized method, or it should be made synced if necessary.
797     */
798    private void updateReferredBy( String page, String referrer )
799    {
800        // We're not really interested in first level self-references.
801        /*
802        if( page.equals( referrer ) )
803        {
804            return;
805        }
806        */
807        // Neither are we interested if plural forms refer to each other.
808        if( m_matchEnglishPlurals )
809        {
810            String p2 = page.endsWith("s") ? page.substring(0,page.length()-1) : page+"s";
811
812            if( referrer.equals(p2) )
813            {
814                return;
815            }
816        }
817
818        Set<String> referrers = m_referredBy.get( page );
819
820        // Even if 'page' has not been created yet, it can still be referenced.
821        // This requires we don't use m_referredBy keys when looking up missing
822        // pages, of course.
823        if(referrers == null)
824        {
825            referrers = new TreeSet<String>();
826            m_referredBy.put( page, referrers );
827        }
828        referrers.add( referrer );
829    }
830
831
832    /**
833     * Clears the references to a certain page so it's no longer in the map.
834     *
835     * @param pagename  Name of the page to clear references for.
836     */
837    public synchronized void clearPageEntries( String pagename )
838    {
839        pagename = getFinalPageName(pagename);
840
841        //
842        //  Remove this item from the referredBy list of any page
843        //  which this item refers to.
844        //
845        Collection<String> c = m_refersTo.get( pagename );
846
847        if( c != null )
848        {
849            for( String key : c )
850            {
851                Collection<?> dref = m_referredBy.get( key );
852
853                dref.remove( pagename );
854            }
855        }
856
857        //
858        //  Finally, remove direct references.
859        //
860        m_referredBy.remove( pagename );
861        m_refersTo.remove( pagename );
862    }
863
864
865    /**
866     *  Finds all unreferenced pages. This requires a linear scan through
867     *  m_referredBy to locate keys with null or empty values.
868     *  
869     *  @return The Collection of Strings
870     */
871    public synchronized Collection< String > findUnreferenced()
872    {
873        ArrayList<String> unref = new ArrayList<String>();
874
875        for( String key : m_referredBy.keySet() )
876        {
877            Set<?> refs = getReferenceList( m_referredBy, key );
878            
879            if( refs == null || refs.isEmpty() )
880            {
881                unref.add( key );
882            }
883        }
884
885        return unref;
886    }
887
888
889    /**
890     * Finds all references to non-existant pages. This requires a linear
891     * scan through m_refersTo values; each value must have a corresponding
892     * key entry in the reference Maps, otherwise such a page has never
893     * been created.
894     * <P>
895     * Returns a Collection containing Strings of unreferenced page names.
896     * Each non-existant page name is shown only once - we don't return information
897     * on who referred to it.
898     * 
899     * @return A Collection of Strings
900     */
901    public synchronized Collection< String > findUncreated()
902    {
903        TreeSet<String> uncreated = new TreeSet<String>();
904
905        // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
906        // We want to reread the code to make sure our HashMaps are in sync...
907
908        Collection<Collection<String>> allReferences = m_refersTo.values();
909
910        for( Collection<String> refs : allReferences )
911        {
912            if( refs != null )
913            {
914                for( String aReference : refs )
915                {
916                    if( m_engine.pageExists( aReference ) == false )
917                    {
918                        uncreated.add( aReference );
919                    }
920                }
921            }
922        }
923
924        return uncreated;
925    }
926
927    /**
928     *  Searches for the given page in the given Map, and returns
929     *  the set of references.  This method also takes care of English plural
930     *  matching.
931     *  
932     *  @param coll The Map to search in
933     *  @param pagename The name to find.
934     *  @return The references list.
935     */
936    private <T> Set<T> getReferenceList( Map<String,Set<T>> coll, String pagename )
937    {
938        Set<T> refs = coll.get( pagename );
939
940        if( m_matchEnglishPlurals )
941        {
942            //
943            //  We'll add also matches from the "other" page.
944            //
945            Set<T> refs2;
946
947            if( pagename.endsWith("s") )
948            {
949                refs2 = coll.get( pagename.substring(0,pagename.length()-1) );
950            }
951            else
952            {
953                refs2 = coll.get( pagename+"s" );
954            }
955
956            if( refs2 != null )
957            {
958                if( refs != null )
959                    refs.addAll( refs2 );
960                else
961                    refs = refs2;
962            }
963        }
964        return refs;
965    }
966
967    /**
968     * Find all pages that refer to this page. Returns null if the page
969     * does not exist or is not referenced at all, otherwise returns a
970     * collection containing page names (String) that refer to this one.
971     * <p>
972     * @param pagename The page to find referrers for.
973     * @return A Set of Strings.  May return null, if the page
974     *         does not exist, or if it has no references.
975     */
976    public synchronized Set< String > findReferrers( String pagename )
977    {
978        Set<String> refs = getReferenceList( m_referredBy, pagename );
979
980        if( refs == null || refs.isEmpty() )
981        {
982            return null;
983        }
984
985        return refs;
986
987    }
988
989    /**
990     *  Returns all pages that refer to this page.  Note that this method
991     *  returns an unmodifiable Map, which may be abruptly changed.  So any
992     *  access to any iterator may result in a ConcurrentModificationException.
993     *  <p>
994     *  The advantages of using this method over findReferrers() is that
995     *  it is very fast, as it does not create a new object.  The disadvantages
996     *  are that it does not do any mapping between plural names, and you
997     *  may end up getting a ConcurrentModificationException.
998     *
999     * @param pageName Page name to query.
1000     * @return A Set of Strings containing the names of all the pages that refer
1001     *         to this page.  May return null, if the page does not exist or
1002     *         has not been indexed yet.
1003     * @since 2.2.33
1004     */
1005    public Set< String > findReferredBy( String pageName )
1006    {
1007        return m_unmutableReferredBy.get( getFinalPageName(pageName) );
1008    }
1009
1010    /**
1011     *  Returns all pages that this page refers to.  You can use this as a quick
1012     *  way of getting the links from a page, but note that it does not link any
1013     *  InterWiki, image, or external links.  It does contain attachments, though.
1014     *  <p>
1015     *  The Collection returned is unmutable, so you cannot change it.  It does reflect
1016     *  the current status and thus is a live object.  So, if you are using any
1017     *  kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
1018     *  <p>
1019     *  The returned value is a Collection, because a page may refer to another page
1020     *  multiple times.
1021     *
1022     * @param pageName Page name to query
1023     * @return A Collection of Strings containing the names of the pages that this page
1024     *         refers to. May return null, if the page does not exist or has not
1025     *         been indexed yet.
1026     * @since 2.2.33
1027     */
1028    public Collection< String > findRefersTo( String pageName )
1029    {
1030        return m_unmutableRefersTo.get( getFinalPageName(pageName) );
1031    }
1032
1033    /**
1034     * This 'deepHashCode' can be used to determine if there were any
1035     * modifications made to the underlying to and by maps of the
1036     * ReferenceManager. The maps of the ReferenceManager are not
1037     * synchronized, so someone could add/remove entries in them while the
1038     * hashCode is being computed.
1039     *
1040     * @return Sum of the hashCodes for the to and by maps of the
1041     *         ReferenceManager
1042     * @since 2.3.24
1043     */
1044    //
1045    //   This method traps and retries if a concurrent
1046    //   modifcaition occurs.
1047    //   TODO: It is unnecessary to calculate the hashcode; it should be calculated only
1048    //         when the hashmaps are changed.  This is slow.
1049    //
1050    public int deepHashCode()
1051    {
1052        boolean failed = true;
1053        int signature = 0;
1054
1055        while (failed)
1056        {
1057            signature = 0;
1058            try
1059            {
1060                signature ^= m_referredBy.hashCode();
1061                signature ^= m_refersTo.hashCode();
1062                failed = false;
1063            }
1064            catch (ConcurrentModificationException e)
1065            {
1066                Thread.yield();
1067            }
1068        }
1069
1070        return signature;
1071    }
1072
1073    /**
1074     *  Returns a list of all pages that the ReferenceManager knows about.
1075     *  This should be roughly equivalent to PageManager.getAllPages(), but without
1076     *  the potential disk access overhead.  Note that this method is not guaranteed
1077     *  to return a Set of really all pages (especially during startup), but it is
1078     *  very fast.
1079     *
1080     *  @return A Set of all defined page names that ReferenceManager knows about.
1081     *  @since 2.3.24
1082     */
1083    public Set< String > findCreated()
1084    {
1085        return new HashSet<String>( m_refersTo.keySet() );
1086    }
1087
1088    private String getFinalPageName( String orig )
1089    {
1090        try
1091        {
1092            String s = m_engine.getFinalPageName( orig );
1093
1094            if( s == null ) s = orig;
1095
1096            return s;
1097        }
1098        catch( ProviderException e )
1099        {
1100            log.error("Error while trying to fetch a page name; trying to cope with the situation.",e);
1101
1102            return orig;
1103        }
1104    }
1105
1106    /**
1107     *  {@inheritDoc}
1108     */
1109    @Override
1110    public void actionPerformed(WikiEvent event)
1111    {
1112        if( (event instanceof WikiPageEvent) && (event.getType() == WikiPageEvent.PAGE_DELETED) )
1113        {
1114            String pageName = ((WikiPageEvent) event).getPageName();
1115
1116            if( pageName != null )
1117            {
1118                pageRemoved( pageName );
1119            }
1120        }
1121    }
1122}