001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.references;
020
021import org.apache.commons.lang3.time.StopWatch;
022import org.apache.log4j.Logger;
023import org.apache.wiki.InternalWikiException;
024import org.apache.wiki.LinkCollector;
025import org.apache.wiki.api.core.Attachment;
026import org.apache.wiki.api.core.Context;
027import org.apache.wiki.api.core.Engine;
028import org.apache.wiki.api.core.Page;
029import org.apache.wiki.api.exceptions.ProviderException;
030import org.apache.wiki.api.filters.BasePageFilter;
031import org.apache.wiki.api.providers.PageProvider;
032import org.apache.wiki.api.providers.WikiProvider;
033import org.apache.wiki.api.spi.Wiki;
034import org.apache.wiki.attachment.AttachmentManager;
035import org.apache.wiki.event.WikiEvent;
036import org.apache.wiki.event.WikiEventManager;
037import org.apache.wiki.event.WikiPageEvent;
038import org.apache.wiki.pages.PageManager;
039import org.apache.wiki.render.RenderingManager;
040import org.apache.wiki.util.TextUtil;
041
042import java.io.*;
043import java.nio.charset.StandardCharsets;
044import java.security.MessageDigest;
045import java.security.NoSuchAlgorithmException;
046import java.util.*;
047import java.util.concurrent.ConcurrentHashMap;
048
049/*
050  BUGS
051
052  - if a wikilink is added to a page, then removed, RefMan still thinks that the page refers to the wikilink page. Hm.
053
054  - if a page is deleted, gets very confused.
055
056  - Serialization causes page attributes to be missing, when InitializablePlugins are not executed properly.  Thus,
057    serialization should really also mark whether a page is serializable or not...
058 */
059
060
061/*
062   A word about synchronizing:
063
064   I expect this object to be accessed in three situations:
065   - when a Engine is created and it scans its wikipages
066   - when the WE saves a page
067   - when a JSP page accesses one of the WE's ReferenceManagers to display a list of (un)referenced pages.
068
069   So, access to this class is fairly rare, and usually triggered by user interaction. OTOH, the methods in this class use their storage
070   objects intensively (and, sorry to say, in an unoptimized manner =). My deduction: using unsynchronized HashMaps etc and syncing methods
071   or code blocks is preferrable to using slow, synced storage objects. We don't have iterative code here, so I'm going to use synced
072   methods for now.
073
074   Please contact me if you notice problems with ReferenceManager, and especially with synchronization, or if you have suggestions about
075   syncing.
076
077   ebu@memecry.net
078*/
079
080/**
081 *  Keeps track of wikipage references:
082 *  <UL>
083 *  <LI>What pages a given page refers to
084 *  <LI>What pages refer to a given page
085 *  </UL>
086 *
087 *  This is a quick'n'dirty approach without any finesse in storage and searching algorithms; we trust java.util.*.
088 *  <P>
089 *  This class contains two HashMaps, m_refersTo and m_referredBy. The first is indexed by WikiPage names and contains a Collection of all
090 *  WikiPages the page refers to. (Multiple references are not counted, naturally.) The second is indexed by WikiPage names and contains
091 *  a Set of all pages that refer to the indexing page. (Notice - the keys of both Maps should be kept in sync.)
092 *  <P>
093 *  When a page is added or edited, its references are parsed, a Collection is received, and we crudely replace anything previous with
094 *  this new Collection. We then check each referenced page name and make sure they know they are referred to by the new page.
095 *  <P>
096 *  Based on this information, we can perform non-optimal searches for e.g. unreferenced pages, top ten lists, etc.
097 *  <P>
098 *  The owning class must take responsibility of filling in any pre-existing information, probably by loading each and every WikiPage
099 *  and calling this class to update the references when created.
100 *
101 *  @since 1.6.1 (as of 2.11.0, moved to org.apache.wiki.references)
102 */
103
104// FIXME: The way that we save attributes is now a major booboo, and must be
105//        replace forthwith.  However, this is a workaround for the great deal
106//        of problems that occur here...
107public class DefaultReferenceManager extends BasePageFilter implements ReferenceManager {
108
109    /**
110     *  Maps page wikiname to a Collection of pages it refers to. The Collection must contain Strings. The Collection may contain
111     *  names of non-existing pages.
112     */
113    private Map< String, Collection< String > > m_refersTo;
114    private Map< String, Collection< String > > m_unmutableRefersTo;
115
116    /**
117     *  Maps page wikiname to a Set of referring pages. The Set must contain Strings. Non-existing pages (a reference exists, but
118     *  not a file for the page contents) may have an empty Set in m_referredBy.
119     */
120    private Map< String, Set< String > > m_referredBy;
121    private Map< String, Set< String > > m_unmutableReferredBy;
122
123    private boolean m_matchEnglishPlurals;
124
125    private static final Logger log = Logger.getLogger( DefaultReferenceManager.class);
126    private static final String SERIALIZATION_FILE = "refmgr.ser";
127    private static final String SERIALIZATION_DIR  = "refmgr-attr";
128
129    /** We use this also a generic serialization id */
130    private static final long serialVersionUID = 4L;
131
132    /**
133     *  Builds a new ReferenceManager.
134     *
135     *  @param engine The Engine to which this is managing references to.
136     */
137    public DefaultReferenceManager( final Engine engine ) {
138        m_refersTo = new ConcurrentHashMap<>();
139        m_referredBy = new ConcurrentHashMap<>();
140        m_engine = engine;
141        m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), Engine.PROP_MATCHPLURALS, false );
142
143        //
144        //  Create two maps that contain unmutable versions of the two basic maps.
145        //
146        m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
147        m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
148    }
149
150    /**
151     *  Does a full reference update.  Does not sync; assumes that you do it afterwards.
152     */
153    private void updatePageReferences( final Page page ) throws ProviderException {
154        final String content = m_engine.getManager( PageManager.class ).getPageText( page.getName(), PageProvider.LATEST_VERSION );
155        final Collection< String > links = scanWikiLinks( page, content );
156        final TreeSet< String > res = new TreeSet<>( links );
157        final List< Attachment > attachments = m_engine.getManager( AttachmentManager.class ).listAttachments( page );
158        for( final Attachment att : attachments ) {
159            res.add( att.getName() );
160        }
161
162        internalUpdateReferences( page.getName(), res );
163    }
164
165    /**
166     *  Initializes the entire reference manager with the initial set of pages from the collection.
167     *
168     *  @param pages A collection of all pages you want to be included in the reference count.
169     *  @since 2.2
170     *  @throws ProviderException If reading of pages fails.
171     */
172    @Override
173    public void initialize( final Collection< Page > pages ) throws ProviderException {
174        log.debug( "Initializing new ReferenceManager with " + pages.size() + " initial pages." );
175        final StopWatch sw = new StopWatch();
176        sw.start();
177        log.info( "Starting cross reference scan of WikiPages" );
178
179        //  First, try to serialize old data from disk.  If that fails, we'll go and update the entire reference lists (which'll take time)
180        try {
181            //  Unserialize things.  The loop below cannot be combined with the other loop below, simply because
182            //  engine.getPage() has side effects such as loading initializing the user databases, which in turn want all
183            //  of the pages to be read already...
184            //
185            //  Yes, this is a kludge.  We know.  Will be fixed.
186            final long saved = unserializeFromDisk();
187
188            for( final Page page : pages ) {
189                unserializeAttrsFromDisk( page );
190            }
191
192            //  Now we must check if any of the pages have been changed  while we were in the electronic la-la-land,
193            //  and update the references for them.
194            for( final Page page : pages ) {
195                if( !( page instanceof Attachment ) ) {
196                    // Refresh with the latest copy
197                    final Page wp = m_engine.getManager( PageManager.class ).getPage( page.getName() );
198
199                    if( wp.getLastModified() == null ) {
200                        log.fatal( "Provider returns null lastModified.  Please submit a bug report." );
201                    } else if( wp.getLastModified().getTime() > saved ) {
202                        updatePageReferences( wp );
203                    }
204                }
205            }
206
207        } catch( final Exception e ) {
208            log.info( "Unable to unserialize old refmgr information, rebuilding database: " + e.getMessage() );
209            buildKeyLists( pages );
210
211            // Scan the existing pages from disk and update references in the manager.
212            for( final Page page : pages ) {
213                // We cannot build a reference list from the contents of attachments, so we skip them.
214                if( !( page instanceof Attachment ) ) {
215                    updatePageReferences( page );
216                    serializeAttrsToDisk( page );
217                }
218            }
219
220            serializeToDisk();
221        }
222
223        sw.stop();
224        log.info( "Cross reference scan done in "+sw );
225
226        WikiEventManager.addWikiEventListener( m_engine.getManager( PageManager.class ), this );
227    }
228
229    /**
230     *  Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk
231     */
232    @SuppressWarnings("unchecked")
233    private synchronized long unserializeFromDisk() throws IOException, ClassNotFoundException {
234        final long saved;
235
236        final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
237        try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) {
238            final StopWatch sw = new StopWatch();
239            sw.start();
240
241            final long ver = in.readLong();
242
243            if( ver != serialVersionUID ) {
244                throw new IOException("File format has changed; I need to recalculate references.");
245            }
246
247            saved        = in.readLong();
248            m_refersTo   = ( Map< String, Collection< String > > ) in.readObject();
249            m_referredBy = ( Map< String, Set< String > > ) in.readObject();
250
251            m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
252            m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
253
254            sw.stop();
255            log.debug("Read serialized data successfully in "+sw);
256        }
257
258        return saved;
259    }
260
261    /**
262     *  Serializes hashmaps to disk.  The format is private, don't touch it.
263     */
264    private synchronized void serializeToDisk() {
265        final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
266        try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) {
267            final StopWatch sw = new StopWatch();
268            sw.start();
269
270            out.writeLong( serialVersionUID );
271            out.writeLong( System.currentTimeMillis() ); // Timestamp
272            out.writeObject( m_refersTo );
273            out.writeObject( m_referredBy );
274
275            sw.stop();
276
277            log.debug("serialization done - took "+sw);
278        } catch( final IOException ioe ) {
279            log.error("Unable to serialize!", ioe);
280        }
281    }
282
283    private String getHashFileName( final String pageName ) {
284        if( pageName == null ) {
285            return null;
286        }
287        try {
288            final MessageDigest digest = MessageDigest.getInstance( "MD5" );
289            final byte[] dig = digest.digest( pageName.getBytes( StandardCharsets.UTF_8 ) );
290
291            return TextUtil.toHexString( dig ) + ".cache";
292        } catch( final NoSuchAlgorithmException e ) {
293            log.fatal( "What do you mean - no such algorithm?", e );
294            return null;
295        }
296    }
297
298    /**
299     *  Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk
300     */
301    private synchronized long unserializeAttrsFromDisk( final Page p ) throws IOException, ClassNotFoundException {
302        long saved = 0L;
303
304        //  Find attribute cache, and check if it exists
305        final String hashName = getHashFileName( p.getName() );
306        if( hashName != null ) {
307            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
308            f = new File( f, hashName );
309            if( !f.exists() ) {
310                return 0L;
311            }
312
313            try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) {
314                final StopWatch sw = new StopWatch();
315                sw.start();
316                log.debug( "Deserializing attributes for " + p.getName() );
317
318                final long ver = in.readLong();
319                if( ver != serialVersionUID ) {
320                    log.debug("File format has changed; cannot deserialize.");
321                    return 0L;
322                }
323
324                saved = in.readLong();
325                final String name  = in.readUTF();
326                if( !name.equals( p.getName() ) ) {
327                    log.debug("File name does not match (" + name + "), skipping...");
328                    return 0L; // Not here
329                }
330
331                final long entries = in.readLong();
332                for( int i = 0; i < entries; i++ ) {
333                    final String key   = in.readUTF();
334                    final Object value = in.readObject();
335                    p.setAttribute( key, value );
336                    log.debug("   attr: "+key+"="+value);
337                }
338
339                sw.stop();
340                log.debug("Read serialized data for "+name+" successfully in "+sw);
341                p.setHasMetadata();
342            }
343        }
344
345        return saved;
346    }
347
348    /**
349     *  Serializes hashmaps to disk.  The format is private, don't touch it.
350     */
351    private synchronized void serializeAttrsToDisk( final Page p ) {
352        final StopWatch sw = new StopWatch();
353        sw.start();
354
355        final String hashName = getHashFileName( p.getName() );
356        if( hashName != null ) {
357            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
358            if( !f.exists() ) {
359                f.mkdirs();
360            }
361
362            //  Create a digest for the name
363            f = new File( f, hashName );
364
365            try( final ObjectOutputStream out =  new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) {
366                // new Set to avoid concurrency issues
367                final Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() );
368
369                if( entries.size() == 0 ) {
370                    //  Nothing to serialize, therefore we will just simply remove the serialization file so that the
371                    //  next time we boot, we don't deserialize old data.
372                    f.delete();
373                    return;
374                }
375
376                out.writeLong( serialVersionUID );
377                out.writeLong( System.currentTimeMillis() ); // Timestamp
378                out.writeUTF( p.getName() );
379                out.writeLong( entries.size() );
380
381                for( final Map.Entry< String, Object > e : entries ) {
382                    if( e.getValue() instanceof Serializable ) {
383                        out.writeUTF( e.getKey() );
384                        out.writeObject( e.getValue() );
385                    }
386                }
387
388            } catch( final IOException e ) {
389                log.error( "Unable to serialize!", e );
390            } finally {
391                sw.stop();
392                log.debug( "serialization for " + p.getName() + " done - took " + sw );
393            }
394        }
395
396    }
397
398    /**
399     *  After the page has been saved, updates the reference lists.
400     *
401     *  @param context {@inheritDoc}
402     *  @param content {@inheritDoc}
403     */
404    @Override
405    public void postSave( final Context context, final String content ) {
406        final Page page = context.getPage();
407        updateReferences( page.getName(), scanWikiLinks( page, content ) );
408        serializeAttrsToDisk( page );
409    }
410
411    /**
412     *  Reads a WikiPageful of data from a String and returns all links internal to this Wiki in a Collection.
413     *
414     *  @param page The WikiPage to scan
415     *  @param pagedata The page contents
416     *  @return a Collection of Strings
417     */
418    @Override
419    public Collection< String > scanWikiLinks( final Page page, final String pagedata ) {
420        final LinkCollector localCollector = new LinkCollector();
421        m_engine.getManager( RenderingManager.class ).textToHTML( Wiki.context().create( m_engine, page ),
422                                                                  pagedata,
423                                                                  localCollector,
424                                                                  null,
425                                                                  localCollector,
426                                                                  false,
427                                                                  true );
428
429        return localCollector.getLinks();
430    }
431
432    /**
433     * Updates the m_referedTo and m_referredBy hashmaps when a page has been deleted.
434     * <P>
435     * Within the m_refersTo map the pagename is a key. The whole key-value-set has to be removed to keep the map clean.
436     * Within the m_referredBy map the name is stored as a value. Since a key can have more than one value we have to
437     * delete just the key-value-pair referring page:deleted page.
438     *
439     *  @param page Name of the page to remove from the maps.
440     */
441    @Override
442    public synchronized void pageRemoved( final Page page ) {
443        pageRemoved( page.getName() );
444    }
445
446    private void pageRemoved( final String pageName ) {
447        final Collection< String > refTo = m_refersTo.get( pageName );
448        if( refTo != null ) {
449            for( final String referredPageName : refTo ) {
450                final Set< String > refBy = m_referredBy.get( referredPageName );
451                if( refBy == null ) {
452                    throw new InternalWikiException( "Refmgr out of sync: page " + pageName +
453                                                     " refers to " + referredPageName + ", which has null referrers." );
454                }
455
456                refBy.remove( pageName );
457                m_referredBy.remove( referredPageName );
458
459                // We won't put it back again if it becomes empty and does not exist.  It will be added
460                // later on anyway, if it becomes referenced again.
461                if( !( refBy.isEmpty() && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPageName ) ) ) {
462                    m_referredBy.put( referredPageName, refBy );
463                }
464            }
465
466            log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName ));
467            m_refersTo.remove( pageName );
468        }
469
470        final Set< String > refBy = m_referredBy.get( pageName );
471        if( refBy == null || refBy.isEmpty() ) {
472            m_referredBy.remove( pageName );
473        }
474
475        //  Remove any traces from the disk, too
476        serializeToDisk();
477
478        final String hashName = getHashFileName( pageName );
479        if( hashName != null ) {
480            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
481            f = new File( f, getHashFileName( pageName ) );
482            if( f.exists() ) {
483                f.delete();
484            }
485        }
486    }
487
488    /**
489     *  Updates all references for the given page.
490     *
491     *  @param page wiki page for which references should be updated
492     */
493    @Override
494    public void updateReferences( final Page page ) {
495        final String pageData = m_engine.getManager( PageManager.class ).getPureText( page.getName(), WikiProvider.LATEST_VERSION );
496        updateReferences( page.getName(), scanWikiLinks( page, pageData ) );
497    }
498
499    /**
500     *  Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is removed
501     *  and a new one is built from scratch. Also calls updateReferredBy() for each referenced page.
502     *  <P>
503     *  This is the method to call when a new page has been created and we want to a) set up its references and b) notify the
504     *  referred pages of the references. Use this method during run-time.
505     *
506     *  @param page Name of the page to update.
507     *  @param references A Collection of Strings, each one pointing to a page this page references.
508     */
509    @Override
510    public synchronized void updateReferences( final String page, final Collection< String > references ) {
511        internalUpdateReferences( page, references );
512        serializeToDisk();
513    }
514
515    /**
516     *  Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is
517     *  removed and a new one is built from scratch. Also calls updateReferredBy() for each referenced page.
518     *  <p>
519     *  This method does not synchronize the database to disk.
520     *
521     *  @param page Name of the page to update.
522     *  @param references A Collection of Strings, each one pointing to a page this page references.
523     */
524    private void internalUpdateReferences( String page, final Collection< String > references) {
525        page = getFinalPageName( page );
526
527        // Create a new entry in m_refersTo.
528        final Collection< String > oldRefTo = m_refersTo.get( page );
529        m_refersTo.remove( page );
530
531        final TreeSet< String > cleanedRefs = new TreeSet<>();
532        for( final String ref : references ) {
533            final String reference = getFinalPageName( ref );
534            cleanedRefs.add( reference );
535        }
536
537        m_refersTo.put( page, cleanedRefs );
538
539        //  We know the page exists, since it's making references somewhere. If an entry for it didn't exist previously
540        //  in m_referredBy, make sure one is added now.
541        if( !m_referredBy.containsKey( page ) ) {
542            m_referredBy.put( page, new TreeSet<>() );
543        }
544
545        //  Get all pages that used to be referred to by 'page' and remove that reference. (We don't want to try to figure out
546        //  which particular references were removed...)
547        cleanReferredBy( page, oldRefTo, cleanedRefs );
548
549        //  Notify all referred pages of their referinesshoodicity.
550        for( final String referredPageName : cleanedRefs ) {
551            updateReferredBy( getFinalPageName( referredPageName ), page );
552        }
553    }
554
555    /**
556     * Returns the refers-to list. For debugging.
557     *
558     * @return The refers-to list.
559     */
560    protected Map< String, Collection< String > > getRefersTo() {
561        return m_refersTo;
562    }
563
564    /**
565     * Returns the referred-by list. For debugging.
566     *
567     * @return Referred-by lists.
568     */
569    protected Map< String, Set< String > > getReferredBy() {
570        return m_referredBy;
571    }
572
573    /**
574     * Cleans the 'referred by' list, removing references by 'referrer' to any other page. Called after 'referrer' is removed.
575     *
576     * Two ways to go about this. One is to look up all pages previously referred by referrer and remove referrer
577     * from their lists, and let the update put them back in (except possibly removed ones).
578     *
579     * The other is to get the old referred-to list, compare to the new, and tell the ones missing in the latter to remove referrer from
580     * their list.
581     *
582     * We'll just try the first for now. Need to come back and optimize this a bit.
583     */
584    private void cleanReferredBy( final String referrer,
585                                  final Collection< String > oldReferred,
586                                  final Collection< String > newReferred ) {
587        if( oldReferred == null ) {
588            return;
589        }
590
591        for( final String referredPage : oldReferred ) {
592            final Set< String > oldRefBy = m_referredBy.get( referredPage );
593            if( oldRefBy != null ) {
594                oldRefBy.remove( referrer );
595            }
596
597            // If the page is referred to by no one AND it doesn't even exist, we might just as well forget about this
598            // entry. It will be added again elsewhere if new references appear.
599            if( ( oldRefBy == null || oldRefBy.isEmpty() ) && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPage ) ) {
600                m_referredBy.remove( referredPage );
601            }
602        }
603    }
604
605    /**
606     * When initially building a ReferenceManager from scratch, call this method BEFORE calling updateReferences() with
607     * a full list of existing page names. It builds the refersTo and referredBy key lists, thus enabling updateReferences()
608     * to function correctly.
609     * <P>
610     * This method should NEVER be called after initialization. It clears all mappings from the reference tables.
611     *
612     * @param pages a Collection containing WikiPage objects.
613     */
614    private synchronized void buildKeyLists( final Collection< Page > pages ) {
615        m_refersTo.clear();
616        m_referredBy.clear();
617        if( pages == null ) {
618            return;
619        }
620
621        try {
622            for( final Page page : pages ) {
623                // We add a non-null entry to referredBy to indicate the referred page exists
624                m_referredBy.put( page.getName(), new TreeSet<>() );
625                // Just add a key to refersTo; the keys need to be in sync with referredBy.
626                m_refersTo.put( page.getName(), new TreeSet<>() );
627            }
628        } catch( final ClassCastException e ) {
629            log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e );
630        }
631    }
632
633
634    /**
635     * Marks the page as referred to by the referrer. If the page does not exist previously, nothing is done. (This means
636     * that some page, somewhere, has a link to a page that does not exist.)
637     * <P>
638     * This method is NOT synchronized. It should only be referred to from within a synchronized method, or it should be
639     * made synced if necessary.
640     */
641    private void updateReferredBy( final String page, final String referrer ) {
642        // We're not really interested in first level self-references.
643        /*
644        if( page.equals( referrer ) )
645        {
646            return;
647        }
648        */
649        // Neither are we interested if plural forms refer to each other.
650        if( m_matchEnglishPlurals ) {
651            final String p2 = page.endsWith( "s" ) ? page.substring( 0, page.length() - 1 ) : page + "s";
652            if( referrer.equals( p2 ) ) {
653                return;
654            }
655        }
656
657        // Even if 'page' has not been created yet, it can still be referenced. This requires we don't use m_referredBy
658        // keys when looking up missing pages, of course.
659        final Set< String > referrers = m_referredBy.computeIfAbsent( page, k -> new TreeSet<>() );
660        referrers.add( referrer );
661    }
662
663
664    /**
665     * Clears the references to a certain page so it's no longer in the map.
666     *
667     * @param pagename  Name of the page to clear references for.
668     */
669    @Override
670    public synchronized void clearPageEntries( String pagename ) {
671        pagename = getFinalPageName( pagename );
672
673        //  Remove this item from the referredBy list of any page which this item refers to.
674        final Collection< String > c = m_refersTo.get( pagename );
675        if( c != null ) {
676            for( final String key : c ) {
677                final Collection< ? > dref = m_referredBy.get( key );
678                dref.remove( pagename );
679            }
680        }
681
682        //  Finally, remove direct references.
683        m_referredBy.remove( pagename );
684        m_refersTo.remove( pagename );
685    }
686
687
688    /**
689     *  Finds all unreferenced pages. This requires a linear scan through m_referredBy to locate keys with null or empty values.
690     *
691     *  @return The Collection of Strings
692     */
693    @Override
694    public synchronized Collection< String > findUnreferenced() {
695        final ArrayList< String > unref = new ArrayList<>();
696        for( final String key : m_referredBy.keySet() ) {
697            final Set< ? > refs = getReferenceList( m_referredBy, key );
698            if( refs == null || refs.isEmpty() ) {
699                unref.add( key );
700            }
701        }
702
703        return unref;
704    }
705
706
707    /**
708     * Finds all references to non-existant pages. This requires a linear scan through m_refersTo values; each value
709     * must have a corresponding key entry in the reference Maps, otherwise such a page has never been created.
710     * <P>
711     * Returns a Collection containing Strings of unreferenced page names. Each non-existant page name is shown only
712     * once - we don't return information on who referred to it.
713     *
714     * @return A Collection of Strings
715     */
716    @Override
717    public synchronized Collection< String > findUncreated() {
718        final TreeSet< String > uncreated = new TreeSet<>();
719
720        // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
721        // We want to reread the code to make sure our HashMaps are in sync...
722        final Collection< Collection< String > > allReferences = m_refersTo.values();
723        for( final Collection<String> refs : allReferences ) {
724            if( refs != null ) {
725                for( final String aReference : refs ) {
726                    if( !m_engine.getManager( PageManager.class ).wikiPageExists( aReference ) ) {
727                        uncreated.add( aReference );
728                    }
729                }
730            }
731        }
732
733        return uncreated;
734    }
735
736    /**
737     *  Searches for the given page in the given Map, and returns the set of references. This method also takes care of
738     *  English plural matching.
739     *
740     *  @param coll The Map to search in
741     *  @param pagename The name to find.
742     *  @return The references list.
743     */
744    private < T > Set< T > getReferenceList( final Map< String, Set< T > > coll, final String pagename ) {
745        Set< T > refs = coll.get( pagename );
746
747        if( m_matchEnglishPlurals ) {
748            //  We'll add also matches from the "other" page.
749            final Set< T > refs2;
750
751            if( pagename.endsWith( "s" ) ) {
752                refs2 = coll.get( pagename.substring( 0, pagename.length() - 1 ) );
753            } else {
754                refs2 = coll.get( pagename + "s" );
755            }
756
757            if( refs2 != null ) {
758                if( refs != null ) {
759                    refs.addAll( refs2 );
760                } else {
761                    refs = refs2;
762                }
763            }
764        }
765        return refs;
766    }
767
768    /**
769     * Find all pages that refer to this page. Returns null if the page does not exist or is not referenced at all,
770     * otherwise returns a collection containing page names (String) that refer to this one.
771     * <p>
772     * @param pagename The page to find referrers for.
773     * @return A Set of Strings.  May return null, if the page does not exist, or if it has no references.
774     */
775    @Override
776    public synchronized Set< String > findReferrers( final String pagename ) {
777        final Set< String > refs = getReferenceList( m_referredBy, pagename );
778        if( refs == null || refs.isEmpty() ) {
779            return null;
780        }
781
782        return refs;
783    }
784
785    /**
786     *  Returns all pages that refer to this page.  Note that this method returns an unmodifiable Map, which may be abruptly changed.
787     *  So any access to any iterator may result in a ConcurrentModificationException.
788     *  <p>
789     *  The advantages of using this method over findReferrers() is that it is very fast, as it does not create a new object.
790     *  The disadvantages are that it does not do any mapping between plural names, and you may end up getting a
791     *  ConcurrentModificationException.
792     *
793     * @param pageName Page name to query.
794     * @return A Set of Strings containing the names of all the pages that refer to this page.  May return null, if the page does
795     *         not exist or has not been indexed yet.
796     * @since 2.2.33
797     */
798    @Override
799    public Set< String > findReferredBy( final String pageName ) {
800        return m_unmutableReferredBy.get( getFinalPageName(pageName) );
801    }
802
803    /**
804     *  Returns all pages that this page refers to.  You can use this as a quick way of getting the links from a page, but note
805     *  that it does not link any InterWiki, image, or external links.  It does contain attachments, though.
806     *  <p>
807     *  The Collection returned is unmutable, so you cannot change it.  It does reflect the current status and thus is a live
808     *  object.  So, if you are using any kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
809     *  <p>
810     *  The returned value is a Collection, because a page may refer to another page multiple times.
811     *
812     * @param pageName Page name to query
813     * @return A Collection of Strings containing the names of the pages that this page refers to. May return null, if the page
814     *         does not exist or has not been indexed yet.
815     * @since 2.2.33
816     */
817    @Override
818    public Collection< String > findRefersTo( final String pageName ) {
819        return m_unmutableRefersTo.get( getFinalPageName( pageName ) );
820    }
821
822    /**
823     * This 'deepHashCode' can be used to determine if there were any modifications made to the underlying to and by maps of the
824     * ReferenceManager. The maps of the ReferenceManager are not synchronized, so someone could add/remove entries in them while the
825     * hashCode is being computed.
826     *
827     * This method traps and retries if a concurrent modification occurs.
828     *
829     * @return Sum of the hashCodes for the to and by maps of the ReferenceManager
830     * @since 2.3.24
831     */
832    //
833    //   TODO: It is unnecessary to calculate the hashcode; it should be calculated only when the hashmaps are changed.  This is slow.
834    //
835    public int deepHashCode() {
836        boolean failed = true;
837        int signature = 0;
838
839        while( failed ) {
840            signature = 0;
841            try {
842                signature ^= m_referredBy.hashCode();
843                signature ^= m_refersTo.hashCode();
844                failed = false;
845            } catch ( final ConcurrentModificationException e) {
846                Thread.yield();
847            }
848        }
849
850        return signature;
851    }
852
853    /**
854     *  Returns a list of all pages that the ReferenceManager knows about. This should be roughly equivalent to
855     *  PageManager.getAllPages(), but without the potential disk access overhead.  Note that this method is not guaranteed
856     *  to return a Set of really all pages (especially during startup), but it is very fast.
857     *
858     *  @return A Set of all defined page names that ReferenceManager knows about.
859     *  @since 2.3.24
860     */
861    @Override
862    public Set< String > findCreated() {
863        return new HashSet<>( m_refersTo.keySet() );
864    }
865
866    private String getFinalPageName( final String orig ) {
867        try {
868            final String s = m_engine.getFinalPageName( orig );
869            return s != null ? s : orig;
870        } catch( final ProviderException e ) {
871            log.error("Error while trying to fetch a page name; trying to cope with the situation.",e);
872            return orig;
873        }
874    }
875
876    /**
877     *  {@inheritDoc}
878     */
879    @Override
880    public void actionPerformed( final WikiEvent event ) {
881        if( event instanceof WikiPageEvent && event.getType() == WikiPageEvent.PAGE_DELETED ) {
882            final String pageName = ( ( WikiPageEvent ) event ).getPageName();
883            if( pageName != null ) {
884                pageRemoved( pageName );
885            }
886        }
887    }
888
889}