001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.references;
020
021import org.apache.commons.lang3.time.StopWatch;
022import org.apache.logging.log4j.LogManager;
023import org.apache.logging.log4j.Logger;
024import org.apache.wiki.InternalWikiException;
025import org.apache.wiki.LinkCollector;
026import org.apache.wiki.api.core.Attachment;
027import org.apache.wiki.api.core.Context;
028import org.apache.wiki.api.core.Engine;
029import org.apache.wiki.api.core.Page;
030import org.apache.wiki.api.exceptions.ProviderException;
031import org.apache.wiki.api.filters.BasePageFilter;
032import org.apache.wiki.api.providers.PageProvider;
033import org.apache.wiki.api.providers.WikiProvider;
034import org.apache.wiki.api.spi.Wiki;
035import org.apache.wiki.attachment.AttachmentManager;
036import org.apache.wiki.event.WikiEvent;
037import org.apache.wiki.event.WikiEventManager;
038import org.apache.wiki.event.WikiPageEvent;
039import org.apache.wiki.pages.PageManager;
040import org.apache.wiki.render.RenderingManager;
041import org.apache.wiki.util.TextUtil;
042
043import java.io.*;
044import java.nio.charset.StandardCharsets;
045import java.nio.file.Files;
046import java.security.MessageDigest;
047import java.security.NoSuchAlgorithmException;
048import java.util.*;
049import java.util.concurrent.ConcurrentHashMap;
050import java.util.stream.Collectors;
051
052/*
053  BUGS
054
055  - if a wikilink is added to a page, then removed, RefMan still thinks that the page refers to the wikilink page. Hm.
056
057  - if a page is deleted, gets very confused.
058
059  - Serialization causes page attributes to be missing, when InitializablePlugins are not executed properly.  Thus,
060    serialization should really also mark whether a page is serializable or not...
061 */
062
063
064/*
065   A word about synchronizing:
066
067   I expect this object to be accessed in three situations:
068   - when an Engine is created, and it scans its wikipages
069   - when the Engine saves a page
070   - when a JSP page accesses one of the Engine's ReferenceManagers to display a list of (un)referenced pages.
071
072   So, access to this class is fairly rare, and usually triggered by user interaction. OTOH, the methods in this class use their storage
073   objects intensively (and, sorry to say, in an unoptimized manner =). My deduction: using unsynchronized HashMaps etc. and syncing methods
074   or code blocks is preferrable to using slow, synced storage objects. We don't have iterative code here, so I'm going to use synced
075   methods for now.
076
077   Please contact me if you notice problems with ReferenceManager, and especially with synchronization, or if you have suggestions about
078   syncing.
079
080   ebu@memecry.net
081*/
082
083/**
084 *  Keeps track of wikipage references:
085 *  <UL>
086 *  <LI>What pages a given page refers to
087 *  <LI>What pages refer to a given page
088 *  </UL>
089 *
090 *  This is a quick'n'dirty approach without any finesse in storage and searching algorithms; we trust java.util.*.
091 *  <P>
092 *  This class contains two HashMaps, m_refersTo and m_referredBy. The first is indexed by WikiPage names and contains a Collection of all
093 *  WikiPages the page refers to. (Multiple references are not counted, naturally.) The second is indexed by WikiPage names and contains
094 *  a Set of all pages that refer to the indexing page. (Notice - the keys of both Maps should be kept in sync.)
095 *  <P>
096 *  When a page is added or edited, its references are parsed, a Collection is received, and we crudely replace anything previous with
097 *  this new Collection. We then check each referenced page name and make sure they know they are referred to by the new page.
098 *  <P>
099 *  Based on this information, we can perform non-optimal searches for e.g. unreferenced pages, top ten lists, etc.
100 *  <P>
101 *  The owning class must take responsibility of filling in any pre-existing information, probably by loading each and every WikiPage
102 *  and calling this class to update the references when created.
103 *
104 *  @since 1.6.1 (as of 2.11.0, moved to org.apache.wiki.references)
105 */
106
107// FIXME: The way that we save attributes is now a major booboo, and must be
108//        replaced forthwith.  However, this is a workaround for the great deal
109//        of problems that occur here...
110public class DefaultReferenceManager extends BasePageFilter implements ReferenceManager, Serializable {
111
112    /**
113     *  Maps page wikiname to a Collection of pages it refers to. The Collection must contain Strings. The Collection may contain
114     *  names of non-existing pages.
115     */
116    private Map< String, Collection< String > > m_refersTo;
117    private Map< String, Collection< String > > m_unmutableRefersTo;
118
119    /**
120     *  Maps page wikiname to a Set of referring pages. The Set must contain Strings. Non-existing pages (a reference exists, but
121     *  not a file for the page contents) may have an empty Set in m_referredBy.
122     */
123    private Map< String, Set< String > > m_referredBy;
124    private Map< String, Set< String > > m_unmutableReferredBy;
125
126    private final boolean m_matchEnglishPlurals;
127
128    private static final Logger LOG = LogManager.getLogger( DefaultReferenceManager.class);
129    private static final String SERIALIZATION_FILE = "refmgr.ser";
130    private static final String SERIALIZATION_DIR  = "refmgr-attr";
131
132    /** We use this also a generic serialization id */
133    private static final long serialVersionUID = 4L;
134
135    /**
136     *  Builds a new ReferenceManager.
137     *
138     *  @param engine The Engine to which this is managing references to.
139     */
140    public DefaultReferenceManager( final Engine engine ) {
141        m_refersTo = new ConcurrentHashMap<>();
142        m_referredBy = new ConcurrentHashMap<>();
143        m_engine = engine;
144        m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), Engine.PROP_MATCHPLURALS, false );
145
146        //
147        //  Create two maps that contain unmutable versions of the two basic maps.
148        //
149        m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
150        m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
151    }
152
153    /**
154     *  Does a full reference update.  Does not sync; assumes that you do it afterwards.
155     */
156    private void updatePageReferences( final Page page ) throws ProviderException {
157        final String content = m_engine.getManager( PageManager.class ).getPageText( page.getName(), PageProvider.LATEST_VERSION );
158        final Collection< String > links = scanWikiLinks( page, content );
159        final TreeSet< String > res = new TreeSet<>( links );
160        final List< Attachment > attachments = m_engine.getManager( AttachmentManager.class ).listAttachments( page );
161        for( final Attachment att : attachments ) {
162            res.add( att.getName() );
163        }
164
165        internalUpdateReferences( page.getName(), res );
166    }
167
168    /**
169     *  Initializes the entire reference manager with the initial set of pages from the collection.
170     *
171     *  @param pages A collection of all pages you want to be included in the reference count.
172     *  @since 2.2
173     *  @throws ProviderException If reading of pages fails.
174     */
175    @Override
176    public void initialize( final Collection< Page > pages ) throws ProviderException {
177        LOG.debug( "Initializing new ReferenceManager with {} initial pages.", pages.size() );
178        final StopWatch sw = new StopWatch();
179        sw.start();
180        LOG.info( "Starting cross reference scan of WikiPages" );
181
182        //  First, try to serialize old data from disk.  If that fails, we'll go and update the entire reference lists (which'll take time)
183        try {
184            //  Unserialize things.  The loop below cannot be combined with the other loop below, simply because
185            //  engine.getPage() has side effects such as loading initializing the user databases, which in turn want all
186            //  the pages to be read already...
187            //
188            //  Yes, this is a kludge.  We know.  Will be fixed.
189            final long saved = unserializeFromDisk();
190
191            for( final Page page : pages ) {
192                unserializeAttrsFromDisk( page );
193            }
194
195            //  Now we must check if any of the pages have been changed  while we were in the electronic la-la-land,
196            //  and update the references for them.
197            for( final Page page : pages ) {
198                if( !( page instanceof Attachment ) ) {
199                    // Refresh with the latest copy
200                    final Page wp = m_engine.getManager( PageManager.class ).getPage( page.getName() );
201
202                    if( wp.getLastModified() == null ) {
203                        LOG.fatal( "Provider returns null lastModified.  Please submit a bug report." );
204                    } else if( wp.getLastModified().getTime() > saved ) {
205                        updatePageReferences( wp );
206                    }
207                }
208            }
209
210        } catch( final Exception e ) {
211            LOG.info( "Unable to unserialize old refmgr information, rebuilding database: {}", e.getMessage() );
212            buildKeyLists( pages );
213
214            // Scan the existing pages from disk and update references in the manager.
215            for( final Page page : pages ) {
216                // We cannot build a reference list from the contents of attachments, so we skip them.
217                if( !( page instanceof Attachment ) ) {
218                    updatePageReferences( page );
219                    serializeAttrsToDisk( page );
220                }
221            }
222
223            serializeToDisk();
224        }
225
226        sw.stop();
227        LOG.info( "Cross reference scan done in {}", sw );
228
229        WikiEventManager.addWikiEventListener( m_engine.getManager( PageManager.class ), this );
230    }
231
232    /**
233     *  Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk
234     */
235    @SuppressWarnings("unchecked")
236    private synchronized long unserializeFromDisk() throws IOException, ClassNotFoundException {
237        final long saved;
238
239        final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
240        try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( Files.newInputStream( f.toPath() ) ) ) ) {
241            final StopWatch sw = new StopWatch();
242            sw.start();
243
244            final long ver = in.readLong();
245
246            if( ver != serialVersionUID ) {
247                throw new IOException("File format has changed; I need to recalculate references.");
248            }
249
250            saved        = in.readLong();
251            m_refersTo   = ( Map< String, Collection< String > > ) in.readObject();
252            m_referredBy = ( Map< String, Set< String > > ) in.readObject();
253
254            m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
255            m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
256
257            sw.stop();
258            LOG.debug( "Read serialized data successfully in {}", sw );
259        }
260
261        return saved;
262    }
263
264    /**
265     *  Serializes hashmaps to disk.  The format is private, don't touch it.
266     */
267    private synchronized void serializeToDisk() {
268        final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
269        try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream( f.toPath() ) ) ) ) {
270            final StopWatch sw = new StopWatch();
271            sw.start();
272
273            out.writeLong( serialVersionUID );
274            out.writeLong( System.currentTimeMillis() ); // Timestamp
275            out.writeObject( m_refersTo );
276            out.writeObject( m_referredBy );
277
278            sw.stop();
279
280            LOG.debug( "serialization done - took {}", sw );
281        } catch( final IOException ioe ) {
282            LOG.error( "Unable to serialize!", ioe );
283        }
284    }
285
286    private String getHashFileName( final String pageName ) {
287        if( pageName == null ) {
288            return null;
289        }
290        try {
291            final MessageDigest digest = MessageDigest.getInstance( "MD5" );
292            final byte[] dig = digest.digest( pageName.getBytes( StandardCharsets.UTF_8 ) );
293
294            return TextUtil.toHexString( dig ) + ".cache";
295        } catch( final NoSuchAlgorithmException e ) {
296            LOG.fatal( "What do you mean - no such algorithm?", e );
297            return null;
298        }
299    }
300
301    /**
302     *  Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk
303     */
304    private synchronized long unserializeAttrsFromDisk( final Page p ) throws IOException, ClassNotFoundException {
305        long saved = 0L;
306
307        //  Find attribute cache, and check if it exists
308        final String hashName = getHashFileName( p.getName() );
309        if( hashName != null ) {
310            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
311            f = new File( f, hashName );
312            if( !f.exists() ) {
313                return 0L;
314            }
315
316            try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( Files.newInputStream( f.toPath() ) ) ) ) {
317                final StopWatch sw = new StopWatch();
318                sw.start();
319                LOG.debug( "Deserializing attributes for {}", p.getName() );
320
321                final long ver = in.readLong();
322                if( ver != serialVersionUID ) {
323                    LOG.debug( "File format has changed; cannot deserialize." );
324                    return 0L;
325                }
326
327                saved = in.readLong();
328                final String name  = in.readUTF();
329                if( !name.equals( p.getName() ) ) {
330                    LOG.debug( "File name does not match ({}), skipping...", name );
331                    return 0L; // Not here
332                }
333
334                final long entries = in.readLong();
335                for( int i = 0; i < entries; i++ ) {
336                    final String key   = in.readUTF();
337                    final Object value = in.readObject();
338                    p.setAttribute( key, value );
339                    LOG.debug( "   attr: {}={}", key, value );
340                }
341
342                sw.stop();
343                LOG.debug( "Read serialized data for {} successfully in {}", name, sw );
344                p.setHasMetadata();
345            }
346        }
347
348        return saved;
349    }
350
351    /**
352     *  Serializes hashmaps to disk.  The format is private, don't touch it.
353     */
354    private synchronized void serializeAttrsToDisk( final Page p ) {
355        final StopWatch sw = new StopWatch();
356        sw.start();
357
358        final String hashName = getHashFileName( p.getName() );
359        if( hashName != null ) {
360            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
361            if( !f.exists() ) {
362                f.mkdirs();
363            }
364
365            //  Create a digest for the name
366            f = new File( f, hashName );
367
368            try( final ObjectOutputStream out =  new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream( f.toPath() ) ) ) ) {
369                // new Set to avoid concurrency issues
370                final Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() );
371
372                if( entries.size() == 0 ) {
373                    //  Nothing to serialize, therefore we will just simply remove the serialization file so that the
374                    //  next time we boot, we don't deserialize old data.
375                    f.delete();
376                    return;
377                }
378
379                out.writeLong( serialVersionUID );
380                out.writeLong( System.currentTimeMillis() ); // Timestamp
381                out.writeUTF( p.getName() );
382                out.writeLong( entries.size() );
383
384                for( final Map.Entry< String, Object > e : entries ) {
385                    if( e.getValue() instanceof Serializable ) {
386                        out.writeUTF( e.getKey() );
387                        out.writeObject( e.getValue() );
388                    }
389                }
390
391            } catch( final IOException e ) {
392                LOG.error( "Unable to serialize!", e );
393            } finally {
394                sw.stop();
395                LOG.debug( "serialization for {} done - took {}", p.getName(), sw );
396            }
397        }
398
399    }
400
401    /**
402     *  After the page has been saved, updates the reference lists.
403     *
404     *  @param context {@inheritDoc}
405     *  @param content {@inheritDoc}
406     */
407    @Override
408    public void postSave( final Context context, final String content ) {
409        final Page page = context.getPage();
410        updateReferences( page.getName(), scanWikiLinks( page, content ) );
411        serializeAttrsToDisk( page );
412    }
413
414    /**
415     *  Reads a WikiPageful of data from a String and returns all links internal to this Wiki in a Collection.
416     *
417     *  @param page The WikiPage to scan
418     *  @param pagedata The page contents
419     *  @return a Collection of Strings
420     */
421    @Override
422    public Collection< String > scanWikiLinks( final Page page, final String pagedata ) {
423        final LinkCollector localCollector = new LinkCollector();
424        m_engine.getManager( RenderingManager.class ).textToHTML( Wiki.context().create( m_engine, page ),
425                                                                  pagedata,
426                                                                  localCollector,
427                                                                  null,
428                                                                  localCollector,
429                                                                  false,
430                                                                  true );
431
432        return localCollector.getLinks();
433    }
434
435    /**
436     * Updates the m_referedTo and m_referredBy hashmaps when a page has been deleted.
437     * <P>
438     * Within the m_refersTo map the pagename is a key. The whole key-value-set has to be removed to keep the map clean.
439     * Within the m_referredBy map the name is stored as a value. Since a key can have more than one value we have to
440     * delete just the key-value-pair referring page:deleted page.
441     *
442     *  @param page Name of the page to remove from the maps.
443     */
444    @Override
445    public void pageRemoved( final Page page ) {
446        pageRemoved( page.getName() );
447    }
448
449    private void pageRemoved( final String pageName ) {
450        final Collection< String > refTo = m_refersTo.get( pageName );
451        if( refTo != null ) {
452            for( final String referredPageName : refTo ) {
453                final Set< String > refBy = m_referredBy.get( referredPageName );
454                if( refBy == null ) {
455                    throw new InternalWikiException( "Refmgr out of sync: page " + pageName +
456                                                     " refers to " + referredPageName + ", which has null referrers." );
457                }
458
459                refBy.remove( pageName );
460                m_referredBy.remove( referredPageName );
461
462                // We won't put it back again if it becomes empty and does not exist.  It will be added
463                // later on anyway, if it becomes referenced again.
464                if( !( refBy.isEmpty() && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPageName ) ) ) {
465                    m_referredBy.put( referredPageName, refBy );
466                }
467            }
468
469            LOG.debug( "Removing from m_refersTo HashMap key:value {}:{}", pageName, m_refersTo.get( pageName ) );
470            m_refersTo.remove( pageName );
471        }
472
473        final Set< String > refBy = m_referredBy.get( pageName );
474        if( refBy == null || refBy.isEmpty() ) {
475            m_referredBy.remove( pageName );
476        }
477
478        //  Remove any traces from the disk, too
479        serializeToDisk();
480
481        final String hashName = getHashFileName( pageName );
482        if( hashName != null ) {
483            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
484            f = new File( f, getHashFileName( pageName ) );
485            if( f.exists() ) {
486                f.delete();
487            }
488        }
489    }
490
491    /**
492     *  Updates all references for the given page.
493     *
494     *  @param page wiki page for which references should be updated
495     */
496    @Override
497    public void updateReferences( final Page page ) {
498        final String pageData = m_engine.getManager( PageManager.class ).getPureText( page.getName(), WikiProvider.LATEST_VERSION );
499        updateReferences( page.getName(), scanWikiLinks( page, pageData ) );
500    }
501
502    /**
503     *  Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is removed
504     *  and a new one is built from scratch. Also calls updateReferredBy() for each referenced page.
505     *  <P>
506     *  This is the method to call when a new page has been created, and we want to a) set up its references and b) notify the
507     *  referred pages of the references. Use this method during run-time.
508     *
509     *  @param page Name of the page to update.
510     *  @param references A Collection of Strings, each one pointing to a page this page references.
511     */
512    @Override
513    public void updateReferences( final String page, final Collection< String > references ) {
514        internalUpdateReferences( page, references );
515        serializeToDisk();
516    }
517
518    /**
519     *  Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is
520     *  removed and a new one is built from scratch. Also calls updateReferredBy() for each referenced page.
521     *  <p>
522     *  This method does not synchronize the database to disk.
523     *
524     *  @param page Name of the page to update.
525     *  @param references A Collection of Strings, each one pointing to a page this page references.
526     */
527    private void internalUpdateReferences( String page, final Collection< String > references) {
528        page = getFinalPageName( page );
529
530        // Create a new entry in m_refersTo.
531        final Collection< String > oldRefTo = m_refersTo.get( page );
532        m_refersTo.remove( page );
533
534        final TreeSet< String > cleanedRefs = references.stream().map(this::getFinalPageName).collect(Collectors.toCollection(TreeSet::new));
535
536        m_refersTo.put( page, cleanedRefs );
537
538        //  We know the page exists, since it's making references somewhere. If an entry for it didn't exist previously
539        //  in m_referredBy, make sure one is added now.
540        if( !m_referredBy.containsKey( page ) ) {
541            m_referredBy.put( page, new TreeSet<>() );
542        }
543
544        //  Get all pages that used to be referred to by 'page' and remove that reference. (We don't want to try to figure out
545        //  which particular references were removed...)
546        cleanReferredBy( page, oldRefTo);
547
548        //  Notify all referred pages of their referinesshoodicity.
549        for( final String referredPageName : cleanedRefs ) {
550            updateReferredBy( getFinalPageName( referredPageName ), page );
551        }
552    }
553
554    /**
555     * Returns the refers-to list. For debugging.
556     *
557     * @return The refers-to list.
558     */
559    protected Map< String, Collection< String > > getRefersTo() {
560        return m_refersTo;
561    }
562
563    /**
564     * Returns the referred-by list. For debugging.
565     *
566     * @return Referred-by lists.
567     */
568    protected Map< String, Set< String > > getReferredBy() {
569        return m_referredBy;
570    }
571
572    /**
573     * Cleans the 'referred by' list, removing references by 'referrer' to any other page. Called after 'referrer' is removed.
574     *
575     * Two ways to go about this. One is to look up all pages previously referred by referrer and remove referrer
576     * from their lists, and let the update put them back in (except possibly removed ones).
577     *
578     * The other is to get the old referred-to list, compare to the new, and tell the ones missing in the latter to remove referrer from
579     * their list.
580     *
581     * We'll just try the first for now. Need to come back and optimize this a bit.
582     */
583    private void cleanReferredBy( final String referrer,
584                                  final Collection< String > oldReferred ) {
585        if( oldReferred == null ) {
586            return;
587        }
588
589        for( final String referredPage : oldReferred ) {
590            final Set< String > oldRefBy = m_referredBy.get( referredPage );
591            if( oldRefBy != null ) {
592                oldRefBy.remove( referrer );
593            }
594
595            // If the page is referred to by no one AND it doesn't even exist, we might just as well forget about this
596            // entry. It will be added again elsewhere if new references appear.
597            if( ( oldRefBy == null || oldRefBy.isEmpty() ) && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPage ) ) {
598                m_referredBy.remove( referredPage );
599            }
600        }
601    }
602
603    /**
604     * When initially building a ReferenceManager from scratch, call this method BEFORE calling updateReferences() with
605     * a full list of existing page names. It builds the refersTo and referredBy key lists, thus enabling updateReferences()
606     * to function correctly.
607     * <P>
608     * This method should NEVER be called after initialization. It clears all mappings from the reference tables.
609     *
610     * @param pages a Collection containing WikiPage objects.
611     */
612    private void buildKeyLists( final Collection< Page > pages ) {
613        m_refersTo.clear();
614        m_referredBy.clear();
615        if( pages == null ) {
616            return;
617        }
618
619        try {
620            for( final Page page : pages ) {
621                // We add a non-null entry to referredBy to indicate the referred page exists
622                m_referredBy.put( page.getName(), new TreeSet<>() );
623                // Just add a key to refersTo; the keys need to be in sync with referredBy.
624                m_refersTo.put( page.getName(), new TreeSet<>() );
625            }
626        } catch( final ClassCastException e ) {
627            LOG.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e );
628        }
629    }
630
631
632    /**
633     * Marks the page as referred to by the referrer. If the page does not exist previously, nothing is done. (This means
634     * that some page, somewhere, has a link to a page that does not exist.)
635     */
636    private void updateReferredBy( final String page, final String referrer ) {
637        // We're not really interested in first level self-references.
638        /*
639        if( page.equals( referrer ) )
640        {
641            return;
642        }
643        */
644        // Neither are we interested if plural forms refer to each other.
645        if( m_matchEnglishPlurals ) {
646            final String p2 = page.endsWith( "s" ) ? page.substring( 0, page.length() - 1 ) : page + "s";
647            if( referrer.equals( p2 ) ) {
648                return;
649            }
650        }
651
652        // Even if 'page' has not been created yet, it can still be referenced. This requires we don't use m_referredBy
653        // keys when looking up missing pages, of course.
654        final Set< String > referrers = m_referredBy.computeIfAbsent( page, k -> new TreeSet<>() );
655        referrers.add( referrer );
656    }
657
658
659    /**
660     * Clears the references to a certain page, so it's no longer in the map.
661     *
662     * @param pagename  Name of the page to clear references for.
663     */
664    @Override
665    public void clearPageEntries( String pagename ) {
666        pagename = getFinalPageName( pagename );
667
668        //  Remove this item from the referredBy list of any page which this item refers to.
669        final Collection< String > c = m_refersTo.get( pagename );
670        if( c != null ) {
671            for( final String key : c ) {
672                final Collection< ? > dref = m_referredBy.get( key );
673                dref.remove( pagename );
674            }
675        }
676
677        //  Finally, remove direct references.
678        m_referredBy.remove( pagename );
679        m_refersTo.remove( pagename );
680    }
681
682
683    /**
684     *  Finds all unreferenced pages. This requires a linear scan through m_referredBy to locate keys with null or empty values.
685     *
686     *  @return The Collection of Strings
687     */
688    @Override
689    public Collection< String > findUnreferenced() {
690        final ArrayList< String > unref = new ArrayList<>();
691        for( final String key : m_referredBy.keySet() ) {
692            final Set< ? > refs = getReferenceList( m_referredBy, key );
693            if( refs == null || refs.isEmpty() ) {
694                unref.add( key );
695            }
696        }
697
698        return unref;
699    }
700
701
702    /**
703     * Finds all references to non-existant pages. This requires a linear scan through m_refersTo values; each value
704     * must have a corresponding key entry in the reference Maps, otherwise such a page has never been created.
705     * <P>
706     * Returns a Collection containing Strings of unreferenced page names. Each non-existant page name is shown only
707     * once - we don't return information on who referred to it.
708     *
709     * @return A Collection of Strings
710     */
711    @Override
712    public Collection< String > findUncreated() {
713        final TreeSet< String > uncreated;
714
715        // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
716        // We want to reread the code to make sure our HashMaps are in sync...
717        final Collection< Collection< String > > allReferences = m_refersTo.values();
718        uncreated = allReferences.stream().filter(Objects::nonNull).flatMap(Collection::stream).filter(aReference -> !m_engine.getManager(PageManager.class).wikiPageExists(aReference)).collect(Collectors.toCollection(TreeSet::new));
719
720        return uncreated;
721    }
722
723    /**
724     *  Searches for the given page in the given Map, and returns the set of references. This method also takes care of
725     *  English plural matching.
726     *
727     *  @param coll The Map to search in
728     *  @param pagename The name to find.
729     *  @return The references list.
730     */
731    private < T > Set< T > getReferenceList( final Map< String, Set< T > > coll, final String pagename ) {
732        Set< T > refs = coll.get( pagename );
733
734        if( m_matchEnglishPlurals ) {
735            //  We'll add also matches from the "other" page.
736            final Set< T > refs2;
737
738            if( pagename.endsWith( "s" ) ) {
739                refs2 = coll.get( pagename.substring( 0, pagename.length() - 1 ) );
740            } else {
741                refs2 = coll.get( pagename + "s" );
742            }
743
744            if( refs2 != null ) {
745                if( refs != null ) {
746                    refs.addAll( refs2 );
747                } else {
748                    refs = refs2;
749                }
750            }
751        }
752        return refs;
753    }
754
755    /**
756     * Find all pages that refer to this page. Returns null if the page does not exist or is not referenced at all,
757     * otherwise returns a collection containing page names (String) that refer to this one.
758     * <p>
759     * @param pagename The page to find referrers for.
760     * @return A Set of Strings.  May return null, if the page does not exist, or if it has no references.
761     */
762    @Override
763    public Set< String > findReferrers( final String pagename ) {
764        final Set< String > refs = getReferenceList( m_referredBy, pagename );
765        if( refs == null || refs.isEmpty() ) {
766            return null;
767        }
768
769        return refs;
770    }
771
772    /**
773     *  Returns all pages that refer to this page.  Note that this method returns an unmodifiable Map, which may be abruptly changed.
774     *  So any access to any iterator may result in a ConcurrentModificationException.
775     *  <p>
776     *  The advantages of using this method over findReferrers() is that it is very fast, as it does not create a new object.
777     *  The disadvantages are that it does not do any mapping between plural names, and you may end up getting a
778     *  ConcurrentModificationException.
779     *
780     * @param pageName Page name to query.
781     * @return A Set of Strings containing the names of all the pages that refer to this page.  May return null, if the page does
782     *         not exist or has not been indexed yet.
783     * @since 2.2.33
784     */
785    @Override
786    public Set< String > findReferredBy( final String pageName ) {
787        return m_unmutableReferredBy.get( getFinalPageName(pageName) );
788    }
789
790    /**
791     *  Returns all pages that this page refers to.  You can use this as a quick way of getting the links from a page, but note
792     *  that it does not link any InterWiki, image, or external links.  It does contain attachments, though.
793     *  <p>
794     *  The Collection returned is unmutable, so you cannot change it.  It does reflect the current status and thus is a live
795     *  object.  So, if you are using any kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
796     *  <p>
797     *  The returned value is a Collection, because a page may refer to another page multiple times.
798     *
799     * @param pageName Page name to query
800     * @return A Collection of Strings containing the names of the pages that this page refers to. May return null, if the page
801     *         does not exist or has not been indexed yet.
802     * @since 2.2.33
803     */
804    @Override
805    public Collection< String > findRefersTo( final String pageName ) {
806        return m_unmutableRefersTo.get( getFinalPageName( pageName ) );
807    }
808
809    /**
810     * This 'deepHashCode' can be used to determine if there were any modifications made to the underlying to and by maps of the
811     * ReferenceManager. The maps of the ReferenceManager are not synchronized, so someone could add/remove entries in them while the
812     * hashCode is being computed.
813     *
814     * This method traps and retries if a concurrent modification occurs.
815     *
816     * @return Sum of the hashCodes for the to and by maps of the ReferenceManager
817     * @since 2.3.24
818     */
819    //
820    //   TODO: It is unnecessary to calculate the hashcode; it should be calculated only when the hashmaps are changed.  This is slow.
821    //
822    public int deepHashCode() {
823        boolean failed = true;
824        int signature = 0;
825
826        while( failed ) {
827            signature = 0;
828            try {
829                signature ^= m_referredBy.hashCode();
830                signature ^= m_refersTo.hashCode();
831                failed = false;
832            } catch ( final ConcurrentModificationException e) {
833                Thread.yield();
834            }
835        }
836
837        return signature;
838    }
839
840    /**
841     *  Returns a list of all pages that the ReferenceManager knows about. This should be roughly equivalent to
842     *  PageManager.getAllPages(), but without the potential disk access overhead.  Note that this method is not guaranteed
843     *  to return a Set of really all pages (especially during startup), but it is very fast.
844     *
845     *  @return A Set of all defined page names that ReferenceManager knows about.
846     *  @since 2.3.24
847     */
848    @Override
849    public Set< String > findCreated() {
850        return new HashSet<>( m_refersTo.keySet() );
851    }
852
853    private String getFinalPageName( final String orig ) {
854        try {
855            final String s = m_engine.getFinalPageName( orig );
856            return s != null ? s : orig;
857        } catch( final ProviderException e ) {
858            LOG.error( "Error while trying to fetch a page name; trying to cope with the situation.", e );
859            return orig;
860        }
861    }
862
863    /**
864     *  {@inheritDoc}
865     */
866    @Override
867    public void actionPerformed( final WikiEvent event ) {
868        if( event instanceof WikiPageEvent && event.getType() == WikiPageEvent.PAGE_DELETED ) {
869            final String pageName = ( ( WikiPageEvent ) event ).getPageName();
870            if( pageName != null ) {
871                pageRemoved( pageName );
872            }
873        }
874    }
875
876}