001/*
002    Licensed to the Apache Software Foundation (ASF) under one
003    or more contributor license agreements.  See the NOTICE file
004    distributed with this work for additional information
005    regarding copyright ownership.  The ASF licenses this file
006    to you under the Apache License, Version 2.0 (the
007    "License"); you may not use this file except in compliance
008    with the License.  You may obtain a copy of the License at
009
010       http://www.apache.org/licenses/LICENSE-2.0
011
012    Unless required by applicable law or agreed to in writing,
013    software distributed under the License is distributed on an
014    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015    KIND, either express or implied.  See the License for the
016    specific language governing permissions and limitations
017    under the License.
018 */
019package org.apache.wiki.references;
020
021import org.apache.commons.lang3.time.StopWatch;
022import org.apache.log4j.Logger;
023import org.apache.wiki.InternalWikiException;
024import org.apache.wiki.LinkCollector;
025import org.apache.wiki.api.core.Attachment;
026import org.apache.wiki.api.core.Context;
027import org.apache.wiki.api.core.Engine;
028import org.apache.wiki.api.core.Page;
029import org.apache.wiki.api.exceptions.ProviderException;
030import org.apache.wiki.api.filters.BasePageFilter;
031import org.apache.wiki.api.providers.PageProvider;
032import org.apache.wiki.api.providers.WikiProvider;
033import org.apache.wiki.api.spi.Wiki;
034import org.apache.wiki.attachment.AttachmentManager;
035import org.apache.wiki.event.WikiEvent;
036import org.apache.wiki.event.WikiEventManager;
037import org.apache.wiki.event.WikiPageEvent;
038import org.apache.wiki.pages.PageManager;
039import org.apache.wiki.render.RenderingManager;
040import org.apache.wiki.util.TextUtil;
041
042import java.io.BufferedInputStream;
043import java.io.BufferedOutputStream;
044import java.io.File;
045import java.io.FileInputStream;
046import java.io.FileOutputStream;
047import java.io.IOException;
048import java.io.ObjectInputStream;
049import java.io.ObjectOutputStream;
050import java.io.Serializable;
051import java.nio.charset.StandardCharsets;
052import java.security.MessageDigest;
053import java.security.NoSuchAlgorithmException;
054import java.util.ArrayList;
055import java.util.Collection;
056import java.util.Collections;
057import java.util.ConcurrentModificationException;
058import java.util.HashMap;
059import java.util.HashSet;
060import java.util.List;
061import java.util.Map;
062import java.util.Set;
063import java.util.TreeSet;
064
065/*
066  BUGS
067
068  - if a wikilink is added to a page, then removed, RefMan still thinks that the page refers to the wikilink page. Hm.
069
070  - if a page is deleted, gets very confused.
071
072  - Serialization causes page attributes to be missing, when InitializablePlugins are not executed properly.  Thus,
073    serialization should really also mark whether a page is serializable or not...
074 */
075
076
077/*
078   A word about synchronizing:
079
080   I expect this object to be accessed in three situations:
081   - when a Engine is created and it scans its wikipages
082   - when the WE saves a page
083   - when a JSP page accesses one of the WE's ReferenceManagers to display a list of (un)referenced pages.
084
085   So, access to this class is fairly rare, and usually triggered by user interaction. OTOH, the methods in this class use their storage
086   objects intensively (and, sorry to say, in an unoptimized manner =). My deduction: using unsynchronized HashMaps etc and syncing methods
087   or code blocks is preferrable to using slow, synced storage objects. We don't have iterative code here, so I'm going to use synced
088   methods for now.
089
090   Please contact me if you notice problems with ReferenceManager, and especially with synchronization, or if you have suggestions about
091   syncing.
092
093   ebu@memecry.net
094*/
095
096/**
097 *  Keeps track of wikipage references:
098 *  <UL>
099 *  <LI>What pages a given page refers to
100 *  <LI>What pages refer to a given page
101 *  </UL>
102 *
103 *  This is a quick'n'dirty approach without any finesse in storage and searching algorithms; we trust java.util.*.
104 *  <P>
105 *  This class contains two HashMaps, m_refersTo and m_referredBy. The first is indexed by WikiPage names and contains a Collection of all
106 *  WikiPages the page refers to. (Multiple references are not counted, naturally.) The second is indexed by WikiPage names and contains
107 *  a Set of all pages that refer to the indexing page. (Notice - the keys of both Maps should be kept in sync.)
108 *  <P>
109 *  When a page is added or edited, its references are parsed, a Collection is received, and we crudely replace anything previous with
110 *  this new Collection. We then check each referenced page name and make sure they know they are referred to by the new page.
111 *  <P>
112 *  Based on this information, we can perform non-optimal searches for e.g. unreferenced pages, top ten lists, etc.
113 *  <P>
114 *  The owning class must take responsibility of filling in any pre-existing information, probably by loading each and every WikiPage
115 *  and calling this class to update the references when created.
116 *
117 *  @since 1.6.1 (as of 2.11.0, moved to org.apache.wiki.references)
118 */
119
120// FIXME: The way that we save attributes is now a major booboo, and must be
121//        replace forthwith.  However, this is a workaround for the great deal
122//        of problems that occur here...
123public class DefaultReferenceManager extends BasePageFilter implements ReferenceManager {
124
125    /**
126     *  Maps page wikiname to a Collection of pages it refers to. The Collection must contain Strings. The Collection may contain
127     *  names of non-existing pages.
128     */
129    private Map< String, Collection< String > > m_refersTo;
130    private Map< String, Collection< String > > m_unmutableRefersTo;
131
132    /**
133     *  Maps page wikiname to a Set of referring pages. The Set must contain Strings. Non-existing pages (a reference exists, but
134     *  not a file for the page contents) may have an empty Set in m_referredBy.
135     */
136    private Map< String, Set< String > > m_referredBy;
137    private Map< String, Set< String > > m_unmutableReferredBy;
138
139    private boolean m_matchEnglishPlurals;
140
141    private static final Logger log = Logger.getLogger( DefaultReferenceManager.class);
142    private static final String SERIALIZATION_FILE = "refmgr.ser";
143    private static final String SERIALIZATION_DIR  = "refmgr-attr";
144
145    /** We use this also a generic serialization id */
146    private static final long serialVersionUID = 4L;
147
148    /**
149     *  Builds a new ReferenceManager.
150     *
151     *  @param engine The Engine to which this is managing references to.
152     */
153    public DefaultReferenceManager( final Engine engine ) {
154        m_refersTo = new HashMap<>();
155        m_referredBy = new HashMap<>();
156        m_engine = engine;
157        m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), Engine.PROP_MATCHPLURALS, false );
158
159        //
160        //  Create two maps that contain unmutable versions of the two basic maps.
161        //
162        m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
163        m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
164    }
165
166    /**
167     *  Does a full reference update.  Does not sync; assumes that you do it afterwards.
168     */
169    private void updatePageReferences( final Page page ) throws ProviderException {
170        final String content = m_engine.getManager( PageManager.class ).getPageText( page.getName(), PageProvider.LATEST_VERSION );
171        final Collection< String > links = scanWikiLinks( page, content );
172        final TreeSet< String > res = new TreeSet<>( links );
173        final List< Attachment > attachments = m_engine.getManager( AttachmentManager.class ).listAttachments( page );
174        for( final Attachment att : attachments ) {
175            res.add( att.getName() );
176        }
177
178        internalUpdateReferences( page.getName(), res );
179    }
180
181    /**
182     *  Initializes the entire reference manager with the initial set of pages from the collection.
183     *
184     *  @param pages A collection of all pages you want to be included in the reference count.
185     *  @since 2.2
186     *  @throws ProviderException If reading of pages fails.
187     */
188    @Override
189    public void initialize( final Collection< Page > pages ) throws ProviderException {
190        log.debug( "Initializing new ReferenceManager with " + pages.size() + " initial pages." );
191        final StopWatch sw = new StopWatch();
192        sw.start();
193        log.info( "Starting cross reference scan of WikiPages" );
194
195        //  First, try to serialize old data from disk.  If that fails, we'll go and update the entire reference lists (which'll take time)
196        try {
197            //  Unserialize things.  The loop below cannot be combined with the other loop below, simply because
198            //  engine.getPage() has side effects such as loading initializing the user databases, which in turn want all
199            //  of the pages to be read already...
200            //
201            //  Yes, this is a kludge.  We know.  Will be fixed.
202            final long saved = unserializeFromDisk();
203
204            for( final Page page : pages ) {
205                unserializeAttrsFromDisk( page );
206            }
207
208            //  Now we must check if any of the pages have been changed  while we were in the electronic la-la-land,
209            //  and update the references for them.
210            for( final Page page : pages ) {
211                if( !( page instanceof Attachment ) ) {
212                    // Refresh with the latest copy
213                    final Page wp = m_engine.getManager( PageManager.class ).getPage( page.getName() );
214
215                    if( wp.getLastModified() == null ) {
216                        log.fatal( "Provider returns null lastModified.  Please submit a bug report." );
217                    } else if( wp.getLastModified().getTime() > saved ) {
218                        updatePageReferences( wp );
219                    }
220                }
221            }
222
223        } catch( final Exception e ) {
224            log.info( "Unable to unserialize old refmgr information, rebuilding database: " + e.getMessage() );
225            buildKeyLists( pages );
226
227            // Scan the existing pages from disk and update references in the manager.
228            for( final Page page : pages ) {
229                // We cannot build a reference list from the contents of attachments, so we skip them.
230                if( !( page instanceof Attachment ) ) {
231                    updatePageReferences( page );
232                    serializeAttrsToDisk( page );
233                }
234            }
235
236            serializeToDisk();
237        }
238
239        sw.stop();
240        log.info( "Cross reference scan done in "+sw );
241
242        WikiEventManager.addWikiEventListener( m_engine.getManager( PageManager.class ), this );
243    }
244
245    /**
246     *  Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk
247     */
248    @SuppressWarnings("unchecked")
249    private synchronized long unserializeFromDisk() throws IOException, ClassNotFoundException {
250        final long saved;
251
252        final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
253        try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) {
254            final StopWatch sw = new StopWatch();
255            sw.start();
256
257            final long ver = in.readLong();
258
259            if( ver != serialVersionUID ) {
260                throw new IOException("File format has changed; I need to recalculate references.");
261            }
262
263            saved        = in.readLong();
264            m_refersTo   = ( Map< String, Collection< String > > ) in.readObject();
265            m_referredBy = ( Map< String, Set< String > > ) in.readObject();
266
267            m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
268            m_unmutableRefersTo   = Collections.unmodifiableMap( m_refersTo );
269
270            sw.stop();
271            log.debug("Read serialized data successfully in "+sw);
272        }
273
274        return saved;
275    }
276
277    /**
278     *  Serializes hashmaps to disk.  The format is private, don't touch it.
279     */
280    private synchronized void serializeToDisk() {
281        final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
282        try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) {
283            final StopWatch sw = new StopWatch();
284            sw.start();
285
286            out.writeLong( serialVersionUID );
287            out.writeLong( System.currentTimeMillis() ); // Timestamp
288            out.writeObject( m_refersTo );
289            out.writeObject( m_referredBy );
290
291            sw.stop();
292
293            log.debug("serialization done - took "+sw);
294        } catch( final IOException ioe ) {
295            log.error("Unable to serialize!", ioe);
296        }
297    }
298
299    private String getHashFileName( final String pageName ) {
300        if( pageName == null ) {
301            return null;
302        }
303        try {
304            final MessageDigest digest = MessageDigest.getInstance( "MD5" );
305            final byte[] dig = digest.digest( pageName.getBytes( StandardCharsets.UTF_8 ) );
306
307            return TextUtil.toHexString( dig ) + ".cache";
308        } catch( final NoSuchAlgorithmException e ) {
309            log.fatal( "What do you mean - no such algorithm?", e );
310            return null;
311        }
312    }
313
314    /**
315     *  Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk
316     */
317    private synchronized long unserializeAttrsFromDisk( final Page p ) throws IOException, ClassNotFoundException {
318        long saved = 0L;
319
320        //  Find attribute cache, and check if it exists
321        final String hashName = getHashFileName( p.getName() );
322        if( hashName != null ) {
323            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
324            f = new File( f, hashName );
325            if( !f.exists() ) {
326                return 0L;
327            }
328
329            try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) {
330                final StopWatch sw = new StopWatch();
331                sw.start();
332                log.debug( "Deserializing attributes for " + p.getName() );
333
334                final long ver = in.readLong();
335                if( ver != serialVersionUID ) {
336                    log.debug("File format has changed; cannot deserialize.");
337                    return 0L;
338                }
339
340                saved = in.readLong();
341                final String name  = in.readUTF();
342                if( !name.equals( p.getName() ) ) {
343                    log.debug("File name does not match (" + name + "), skipping...");
344                    return 0L; // Not here
345                }
346
347                final long entries = in.readLong();
348                for( int i = 0; i < entries; i++ ) {
349                    final String key   = in.readUTF();
350                    final Object value = in.readObject();
351                    p.setAttribute( key, value );
352                    log.debug("   attr: "+key+"="+value);
353                }
354
355                sw.stop();
356                log.debug("Read serialized data for "+name+" successfully in "+sw);
357                p.setHasMetadata();
358            }
359        }
360
361        return saved;
362    }
363
364    /**
365     *  Serializes hashmaps to disk.  The format is private, don't touch it.
366     */
367    private synchronized void serializeAttrsToDisk( final Page p ) {
368        final StopWatch sw = new StopWatch();
369        sw.start();
370
371        final String hashName = getHashFileName( p.getName() );
372        if( hashName != null ) {
373            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
374            if( !f.exists() ) {
375                f.mkdirs();
376            }
377
378            //  Create a digest for the name
379            f = new File( f, hashName );
380
381            try( final ObjectOutputStream out =  new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) {
382                // new Set to avoid concurrency issues
383                final Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() );
384
385                if( entries.size() == 0 ) {
386                    //  Nothing to serialize, therefore we will just simply remove the serialization file so that the
387                    //  next time we boot, we don't deserialize old data.
388                    f.delete();
389                    return;
390                }
391
392                out.writeLong( serialVersionUID );
393                out.writeLong( System.currentTimeMillis() ); // Timestamp
394                out.writeUTF( p.getName() );
395                out.writeLong( entries.size() );
396
397                for( final Map.Entry< String, Object > e : entries ) {
398                    if( e.getValue() instanceof Serializable ) {
399                        out.writeUTF( e.getKey() );
400                        out.writeObject( e.getValue() );
401                    }
402                }
403
404            } catch( final IOException e ) {
405                log.error( "Unable to serialize!", e );
406            } finally {
407                sw.stop();
408                log.debug( "serialization for " + p.getName() + " done - took " + sw );
409            }
410        }
411
412    }
413
414    /**
415     *  After the page has been saved, updates the reference lists.
416     *
417     *  @param context {@inheritDoc}
418     *  @param content {@inheritDoc}
419     */
420    @Override
421    public void postSave( final Context context, final String content ) {
422        final Page page = context.getPage();
423        updateReferences( page.getName(), scanWikiLinks( page, content ) );
424        serializeAttrsToDisk( page );
425    }
426
427    /**
428     *  Reads a WikiPageful of data from a String and returns all links internal to this Wiki in a Collection.
429     *
430     *  @param page The WikiPage to scan
431     *  @param pagedata The page contents
432     *  @return a Collection of Strings
433     */
434    @Override
435    public Collection< String > scanWikiLinks( final Page page, final String pagedata ) {
436        final LinkCollector localCollector = new LinkCollector();
437        m_engine.getManager( RenderingManager.class ).textToHTML( Wiki.context().create( m_engine, page ),
438                                                                  pagedata,
439                                                                  localCollector,
440                                                                  null,
441                                                                  localCollector,
442                                                                  false,
443                                                                  true );
444
445        return localCollector.getLinks();
446    }
447
448    /**
449     * Updates the m_referedTo and m_referredBy hashmaps when a page has been deleted.
450     * <P>
451     * Within the m_refersTo map the pagename is a key. The whole key-value-set has to be removed to keep the map clean.
452     * Within the m_referredBy map the name is stored as a value. Since a key can have more than one value we have to
453     * delete just the key-value-pair referring page:deleted page.
454     *
455     *  @param page Name of the page to remove from the maps.
456     */
457    @Override
458    public synchronized void pageRemoved( final Page page ) {
459        pageRemoved( page.getName() );
460    }
461
462    private void pageRemoved( final String pageName ) {
463        final Collection< String > refTo = m_refersTo.get( pageName );
464        if( refTo != null ) {
465            for( final String referredPageName : refTo ) {
466                final Set< String > refBy = m_referredBy.get( referredPageName );
467                if( refBy == null ) {
468                    throw new InternalWikiException( "Refmgr out of sync: page " + pageName +
469                                                     " refers to " + referredPageName + ", which has null referrers." );
470                }
471
472                refBy.remove( pageName );
473                m_referredBy.remove( referredPageName );
474
475                // We won't put it back again if it becomes empty and does not exist.  It will be added
476                // later on anyway, if it becomes referenced again.
477                if( !( refBy.isEmpty() && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPageName ) ) ) {
478                    m_referredBy.put( referredPageName, refBy );
479                }
480            }
481
482            log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName ));
483            m_refersTo.remove( pageName );
484        }
485
486        final Set< String > refBy = m_referredBy.get( pageName );
487        if( refBy == null || refBy.isEmpty() ) {
488            m_referredBy.remove( pageName );
489        }
490
491        //  Remove any traces from the disk, too
492        serializeToDisk();
493
494        final String hashName = getHashFileName( pageName );
495        if( hashName != null ) {
496            File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
497            f = new File( f, getHashFileName( pageName ) );
498            if( f.exists() ) {
499                f.delete();
500            }
501        }
502    }
503
504    /**
505     *  Updates all references for the given page.
506     *
507     *  @param page wiki page for which references should be updated
508     */
509    @Override
510    public void updateReferences( final Page page ) {
511        final String pageData = m_engine.getManager( PageManager.class ).getPureText( page.getName(), WikiProvider.LATEST_VERSION );
512        updateReferences( page.getName(), scanWikiLinks( page, pageData ) );
513    }
514
515    /**
516     *  Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is removed
517     *  and a new one is built from scratch. Also calls updateReferredBy() for each referenced page.
518     *  <P>
519     *  This is the method to call when a new page has been created and we want to a) set up its references and b) notify the
520     *  referred pages of the references. Use this method during run-time.
521     *
522     *  @param page Name of the page to update.
523     *  @param references A Collection of Strings, each one pointing to a page this page references.
524     */
525    @Override
526    public synchronized void updateReferences( final String page, final Collection< String > references ) {
527        internalUpdateReferences( page, references );
528        serializeToDisk();
529    }
530
531    /**
532     *  Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is
533     *  removed and a new one is built from scratch. Also calls updateReferredBy() for each referenced page.
534     *  <p>
535     *  This method does not synchronize the database to disk.
536     *
537     *  @param page Name of the page to update.
538     *  @param references A Collection of Strings, each one pointing to a page this page references.
539     */
540    private void internalUpdateReferences( String page, final Collection< String > references) {
541        page = getFinalPageName( page );
542
543        // Create a new entry in m_refersTo.
544        final Collection< String > oldRefTo = m_refersTo.get( page );
545        m_refersTo.remove( page );
546
547        final TreeSet< String > cleanedRefs = new TreeSet<>();
548        for( final String ref : references ) {
549            final String reference = getFinalPageName( ref );
550            cleanedRefs.add( reference );
551        }
552
553        m_refersTo.put( page, cleanedRefs );
554
555        //  We know the page exists, since it's making references somewhere. If an entry for it didn't exist previously
556        //  in m_referredBy, make sure one is added now.
557        if( !m_referredBy.containsKey( page ) ) {
558            m_referredBy.put( page, new TreeSet<>() );
559        }
560
561        //  Get all pages that used to be referred to by 'page' and remove that reference. (We don't want to try to figure out
562        //  which particular references were removed...)
563        cleanReferredBy( page, oldRefTo, cleanedRefs );
564
565        //  Notify all referred pages of their referinesshoodicity.
566        for( final String referredPageName : cleanedRefs ) {
567            updateReferredBy( getFinalPageName( referredPageName ), page );
568        }
569    }
570
571    /**
572     * Returns the refers-to list. For debugging.
573     *
574     * @return The refers-to list.
575     */
576    protected Map< String, Collection< String > > getRefersTo() {
577        return m_refersTo;
578    }
579
580    /**
581     * Returns the referred-by list. For debugging.
582     *
583     * @return Referred-by lists.
584     */
585    protected Map< String, Set< String > > getReferredBy() {
586        return m_referredBy;
587    }
588
589    /**
590     * Cleans the 'referred by' list, removing references by 'referrer' to any other page. Called after 'referrer' is removed.
591     *
592     * Two ways to go about this. One is to look up all pages previously referred by referrer and remove referrer
593     * from their lists, and let the update put them back in (except possibly removed ones).
594     *
595     * The other is to get the old referred-to list, compare to the new, and tell the ones missing in the latter to remove referrer from
596     * their list.
597     *
598     * We'll just try the first for now. Need to come back and optimize this a bit.
599     */
600    private void cleanReferredBy( final String referrer,
601                                  final Collection< String > oldReferred,
602                                  final Collection< String > newReferred ) {
603        if( oldReferred == null ) {
604            return;
605        }
606
607        for( final String referredPage : oldReferred ) {
608            final Set< String > oldRefBy = m_referredBy.get( referredPage );
609            if( oldRefBy != null ) {
610                oldRefBy.remove( referrer );
611            }
612
613            // If the page is referred to by no one AND it doesn't even exist, we might just as well forget about this
614            // entry. It will be added again elsewhere if new references appear.
615            if( ( oldRefBy == null || oldRefBy.isEmpty() ) && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPage ) ) {
616                m_referredBy.remove( referredPage );
617            }
618        }
619    }
620
621    /**
622     * When initially building a ReferenceManager from scratch, call this method BEFORE calling updateReferences() with
623     * a full list of existing page names. It builds the refersTo and referredBy key lists, thus enabling updateReferences()
624     * to function correctly.
625     * <P>
626     * This method should NEVER be called after initialization. It clears all mappings from the reference tables.
627     *
628     * @param pages a Collection containing WikiPage objects.
629     */
630    private synchronized void buildKeyLists( final Collection< Page > pages ) {
631        m_refersTo.clear();
632        m_referredBy.clear();
633        if( pages == null ) {
634            return;
635        }
636
637        try {
638            for( final Page page : pages ) {
639                // We add a non-null entry to referredBy to indicate the referred page exists
640                m_referredBy.put( page.getName(), new TreeSet<>() );
641                // Just add a key to refersTo; the keys need to be in sync with referredBy.
642                m_refersTo.put( page.getName(), null );
643            }
644        } catch( final ClassCastException e ) {
645            log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e );
646        }
647    }
648
649
650    /**
651     * Marks the page as referred to by the referrer. If the page does not exist previously, nothing is done. (This means
652     * that some page, somewhere, has a link to a page that does not exist.)
653     * <P>
654     * This method is NOT synchronized. It should only be referred to from within a synchronized method, or it should be
655     * made synced if necessary.
656     */
657    private void updateReferredBy( final String page, final String referrer ) {
658        // We're not really interested in first level self-references.
659        /*
660        if( page.equals( referrer ) )
661        {
662            return;
663        }
664        */
665        // Neither are we interested if plural forms refer to each other.
666        if( m_matchEnglishPlurals ) {
667            final String p2 = page.endsWith( "s" ) ? page.substring( 0, page.length() - 1 ) : page + "s";
668            if( referrer.equals( p2 ) ) {
669                return;
670            }
671        }
672
673        // Even if 'page' has not been created yet, it can still be referenced. This requires we don't use m_referredBy
674        // keys when looking up missing pages, of course.
675        final Set< String > referrers = m_referredBy.computeIfAbsent( page, k -> new TreeSet<>() );
676        referrers.add( referrer );
677    }
678
679
680    /**
681     * Clears the references to a certain page so it's no longer in the map.
682     *
683     * @param pagename  Name of the page to clear references for.
684     */
685    @Override public synchronized void clearPageEntries( String pagename ) {
686        pagename = getFinalPageName( pagename );
687
688        //  Remove this item from the referredBy list of any page which this item refers to.
689        final Collection< String > c = m_refersTo.get( pagename );
690        if( c != null ) {
691            for( final String key : c ) {
692                final Collection< ? > dref = m_referredBy.get( key );
693                dref.remove( pagename );
694            }
695        }
696
697        //  Finally, remove direct references.
698        m_referredBy.remove( pagename );
699        m_refersTo.remove( pagename );
700    }
701
702
703    /**
704     *  Finds all unreferenced pages. This requires a linear scan through m_referredBy to locate keys with null or empty values.
705     *
706     *  @return The Collection of Strings
707     */
708    @Override public synchronized Collection< String > findUnreferenced() {
709        final ArrayList< String > unref = new ArrayList<>();
710        for( final String key : m_referredBy.keySet() ) {
711            final Set< ? > refs = getReferenceList( m_referredBy, key );
712            if( refs == null || refs.isEmpty() ) {
713                unref.add( key );
714            }
715        }
716
717        return unref;
718    }
719
720
721    /**
722     * Finds all references to non-existant pages. This requires a linear scan through m_refersTo values; each value
723     * must have a corresponding key entry in the reference Maps, otherwise such a page has never been created.
724     * <P>
725     * Returns a Collection containing Strings of unreferenced page names. Each non-existant page name is shown only
726     * once - we don't return information on who referred to it.
727     *
728     * @return A Collection of Strings
729     */
730    @Override public synchronized Collection< String > findUncreated() {
731        final TreeSet< String > uncreated = new TreeSet<>();
732
733        // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
734        // We want to reread the code to make sure our HashMaps are in sync...
735        final Collection< Collection< String > > allReferences = m_refersTo.values();
736        for( final Collection<String> refs : allReferences ) {
737            if( refs != null ) {
738                for( final String aReference : refs ) {
739                    if( !m_engine.getManager( PageManager.class ).wikiPageExists( aReference ) ) {
740                        uncreated.add( aReference );
741                    }
742                }
743            }
744        }
745
746        return uncreated;
747    }
748
749    /**
750     *  Searches for the given page in the given Map, and returns the set of references. This method also takes care of
751     *  English plural matching.
752     *
753     *  @param coll The Map to search in
754     *  @param pagename The name to find.
755     *  @return The references list.
756     */
757    private < T > Set< T > getReferenceList( final Map< String, Set< T > > coll, final String pagename ) {
758        Set< T > refs = coll.get( pagename );
759
760        if( m_matchEnglishPlurals ) {
761            //  We'll add also matches from the "other" page.
762            final Set< T > refs2;
763
764            if( pagename.endsWith( "s" ) ) {
765                refs2 = coll.get( pagename.substring( 0, pagename.length() - 1 ) );
766            } else {
767                refs2 = coll.get( pagename + "s" );
768            }
769
770            if( refs2 != null ) {
771                if( refs != null ) {
772                    refs.addAll( refs2 );
773                } else {
774                    refs = refs2;
775                }
776            }
777        }
778        return refs;
779    }
780
781    /**
782     * Find all pages that refer to this page. Returns null if the page does not exist or is not referenced at all,
783     * otherwise returns a collection containing page names (String) that refer to this one.
784     * <p>
785     * @param pagename The page to find referrers for.
786     * @return A Set of Strings.  May return null, if the page does not exist, or if it has no references.
787     */
788    @Override public synchronized Set< String > findReferrers( final String pagename ) {
789        final Set< String > refs = getReferenceList( m_referredBy, pagename );
790        if( refs == null || refs.isEmpty() ) {
791            return null;
792        }
793
794        return refs;
795    }
796
797    /**
798     *  Returns all pages that refer to this page.  Note that this method returns an unmodifiable Map, which may be abruptly changed.
799     *  So any access to any iterator may result in a ConcurrentModificationException.
800     *  <p>
801     *  The advantages of using this method over findReferrers() is that it is very fast, as it does not create a new object.
802     *  The disadvantages are that it does not do any mapping between plural names, and you may end up getting a
803     *  ConcurrentModificationException.
804     *
805     * @param pageName Page name to query.
806     * @return A Set of Strings containing the names of all the pages that refer to this page.  May return null, if the page does
807     *         not exist or has not been indexed yet.
808     * @since 2.2.33
809     */
810    @Override public Set< String > findReferredBy( final String pageName ) {
811        return m_unmutableReferredBy.get( getFinalPageName(pageName) );
812    }
813
814    /**
815     *  Returns all pages that this page refers to.  You can use this as a quick way of getting the links from a page, but note
816     *  that it does not link any InterWiki, image, or external links.  It does contain attachments, though.
817     *  <p>
818     *  The Collection returned is unmutable, so you cannot change it.  It does reflect the current status and thus is a live
819     *  object.  So, if you are using any kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
820     *  <p>
821     *  The returned value is a Collection, because a page may refer to another page multiple times.
822     *
823     * @param pageName Page name to query
824     * @return A Collection of Strings containing the names of the pages that this page refers to. May return null, if the page
825     *         does not exist or has not been indexed yet.
826     * @since 2.2.33
827     */
828    @Override public Collection< String > findRefersTo( final String pageName ) {
829        return m_unmutableRefersTo.get( getFinalPageName( pageName ) );
830    }
831
832    /**
833     * This 'deepHashCode' can be used to determine if there were any modifications made to the underlying to and by maps of the
834     * ReferenceManager. The maps of the ReferenceManager are not synchronized, so someone could add/remove entries in them while the
835     * hashCode is being computed.
836     *
837     * This method traps and retries if a concurrent modification occurs.
838     *
839     * @return Sum of the hashCodes for the to and by maps of the ReferenceManager
840     * @since 2.3.24
841     */
842    //
843    //   TODO: It is unnecessary to calculate the hashcode; it should be calculated only when the hashmaps are changed.  This is slow.
844    //
845    public int deepHashCode() {
846        boolean failed = true;
847        int signature = 0;
848
849        while( failed ) {
850            signature = 0;
851            try {
852                signature ^= m_referredBy.hashCode();
853                signature ^= m_refersTo.hashCode();
854                failed = false;
855            } catch ( final ConcurrentModificationException e) {
856                Thread.yield();
857            }
858        }
859
860        return signature;
861    }
862
863    /**
864     *  Returns a list of all pages that the ReferenceManager knows about. This should be roughly equivalent to
865     *  PageManager.getAllPages(), but without the potential disk access overhead.  Note that this method is not guaranteed
866     *  to return a Set of really all pages (especially during startup), but it is very fast.
867     *
868     *  @return A Set of all defined page names that ReferenceManager knows about.
869     *  @since 2.3.24
870     */
871    @Override public Set< String > findCreated() {
872        return new HashSet<>( m_refersTo.keySet() );
873    }
874
875    private String getFinalPageName( final String orig ) {
876        try {
877            final String s = m_engine.getFinalPageName( orig );
878            return s != null ? s : orig;
879        } catch( final ProviderException e ) {
880            log.error("Error while trying to fetch a page name; trying to cope with the situation.",e);
881            return orig;
882        }
883    }
884
885    /**
886     *  {@inheritDoc}
887     */
888    @Override
889    public void actionPerformed( final WikiEvent event ) {
890        if( event instanceof WikiPageEvent && event.getType() == WikiPageEvent.PAGE_DELETED ) {
891            final String pageName = ( ( WikiPageEvent ) event ).getPageName();
892            if( pageName != null ) {
893                pageRemoved( pageName );
894            }
895        }
896    }
897
898}