001 /*
002 Licensed to the Apache Software Foundation (ASF) under one
003 or more contributor license agreements. See the NOTICE file
004 distributed with this work for additional information
005 regarding copyright ownership. The ASF licenses this file
006 to you under the Apache License, Version 2.0 (the
007 "License"); you may not use this file except in compliance
008 with the License. You may obtain a copy of the License at
009
010 http://www.apache.org/licenses/LICENSE-2.0
011
012 Unless required by applicable law or agreed to in writing,
013 software distributed under the License is distributed on an
014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 KIND, either express or implied. See the License for the
016 specific language governing permissions and limitations
017 under the License.
018 */
019 package org.apache.wiki;
020
021 import java.io.BufferedInputStream;
022 import java.io.BufferedOutputStream;
023 import java.io.File;
024 import java.io.FileInputStream;
025 import java.io.FileOutputStream;
026 import java.io.IOException;
027 import java.io.ObjectInputStream;
028 import java.io.ObjectOutputStream;
029 import java.io.Serializable;
030 import java.io.UnsupportedEncodingException;
031 import java.security.MessageDigest;
032 import java.security.NoSuchAlgorithmException;
033 import java.util.ArrayList;
034 import java.util.Collection;
035 import java.util.Collections;
036 import java.util.ConcurrentModificationException;
037 import java.util.HashMap;
038 import java.util.HashSet;
039 import java.util.Iterator;
040 import java.util.Map;
041 import java.util.Set;
042 import java.util.TreeSet;
043
044 import org.apache.commons.lang.time.StopWatch;
045 import org.apache.log4j.Logger;
046 import org.apache.wiki.api.exceptions.ProviderException;
047 import org.apache.wiki.api.filters.BasicPageFilter;
048 import org.apache.wiki.attachment.Attachment;
049 import org.apache.wiki.event.WikiEvent;
050 import org.apache.wiki.event.WikiEventListener;
051 import org.apache.wiki.event.WikiEventUtils;
052 import org.apache.wiki.event.WikiPageEvent;
053 import org.apache.wiki.modules.InternalModule;
054 import org.apache.wiki.providers.WikiPageProvider;
055 import org.apache.wiki.util.TextUtil;
056
057 /*
058 BUGS
059
060 - if a wikilink is added to a page, then removed, RefMan still thinks that
061 the page refers to the wikilink page. Hm.
062
063 - if a page is deleted, gets very confused.
064
065 - Serialization causes page attributes to be missing, when InitializablePlugins
066 are not executed properly. Thus, serialization should really also mark whether
067 a page is serializable or not...
068 */
069
070
071 /*
072 A word about synchronizing:
073
074 I expect this object to be accessed in three situations:
075 - when a WikiEngine is created and it scans its wikipages
076 - when the WE saves a page
077 - when a JSP page accesses one of the WE's ReferenceManagers
078 to display a list of (un)referenced pages.
079
080 So, access to this class is fairly rare, and usually triggered by
081 user interaction. OTOH, the methods in this class use their storage
082 objects intensively (and, sorry to say, in an unoptimized manner =).
083 My deduction: using unsynchronized HashMaps etc and syncing methods
084 or code blocks is preferrable to using slow, synced storage objects.
085 We don't have iterative code here, so I'm going to use synced methods
086 for now.
087
088 Please contact me if you notice problems with ReferenceManager, and
089 especially with synchronization, or if you have suggestions about
090 syncing.
091
092 ebu@memecry.net
093 */
094
095 /**
096 * Keeps track of wikipage references:
097 * <UL>
098 * <LI>What pages a given page refers to
099 * <LI>What pages refer to a given page
100 * </UL>
101 *
102 * This is a quick'n'dirty approach without any finesse in storage and
103 * searching algorithms; we trust java.util.*.
104 * <P>
105 * This class contains two HashMaps, m_refersTo and m_referredBy. The
106 * first is indexed by WikiPage names and contains a Collection of all
107 * WikiPages the page refers to. (Multiple references are not counted,
108 * naturally.) The second is indexed by WikiPage names and contains
109 * a Set of all pages that refer to the indexing page. (Notice -
110 * the keys of both Maps should be kept in sync.)
111 * <P>
112 * When a page is added or edited, its references are parsed, a Collection
113 * is received, and we crudely replace anything previous with this new
114 * Collection. We then check each referenced page name and make sure they
115 * know they are referred to by the new page.
116 * <P>
117 * Based on this information, we can perform non-optimal searches for
118 * e.g. unreferenced pages, top ten lists, etc.
119 * <P>
120 * The owning class must take responsibility of filling in any pre-existing
121 * information, probably by loading each and every WikiPage and calling this
122 * class to update the references when created.
123 *
124 * @since 1.6.1
125 */
126
127 // FIXME: The way that we save attributes is now a major booboo, and must be
128 // replace forthwith. However, this is a workaround for the great deal
129 // of problems that occur here...
130
131 public class ReferenceManager
132 extends BasicPageFilter
133 implements InternalModule, WikiEventListener
134 {
135 /** Maps page wikiname to a Collection of pages it refers to. The Collection
136 * must contain Strings. The Collection may contain names of non-existing
137 * pages.
138 */
139 private Map<String,Collection<String>> m_refersTo;
140 private Map<String,Collection<String>> m_unmutableRefersTo;
141
142 /** Maps page wikiname to a Set of referring pages. The Set must
143 * contain Strings. Non-existing pages (a reference exists, but not a file
144 * for the page contents) may have an empty Set in m_referredBy.
145 */
146 private Map<String,Set<String>> m_referredBy;
147 private Map<String,Set<String>> m_unmutableReferredBy;
148
149 /** The WikiEngine that owns this object. */
150 private WikiEngine m_engine;
151
152 private boolean m_matchEnglishPlurals = false;
153
154 private static Logger log = Logger.getLogger(ReferenceManager.class);
155
156 private static final String SERIALIZATION_FILE = "refmgr.ser";
157 private static final String SERIALIZATION_DIR = "refmgr-attr";
158
159 /** We use this also a generic serialization id */
160 private static final long serialVersionUID = 4L;
161
162 /**
163 * Builds a new ReferenceManager.
164 *
165 * @param engine The WikiEngine to which this is managing references to.
166 */
167 public ReferenceManager( WikiEngine engine )
168 {
169 m_refersTo = new HashMap<String,Collection<String>>();
170 m_referredBy = new HashMap<String,Set<String>>();
171 m_engine = engine;
172
173 m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(),
174 WikiEngine.PROP_MATCHPLURALS,
175 m_matchEnglishPlurals );
176
177 //
178 // Create two maps that contain unmutable versions of the two basic maps.
179 //
180 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
181 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo );
182 }
183
184 /**
185 * Does a full reference update. Does not sync; assumes that you do it afterwards.
186 */
187 private void updatePageReferences( WikiPage page ) throws ProviderException
188 {
189 String content = m_engine.getPageManager().getPageText( page.getName(),
190 WikiPageProvider.LATEST_VERSION );
191
192 TreeSet<String> res = new TreeSet<String>();
193 Collection<String> links = m_engine.scanWikiLinks( page, content );
194
195 res.addAll( links );
196 Collection attachments = m_engine.getAttachmentManager().listAttachments( page );
197
198 for( Iterator atti = attachments.iterator(); atti.hasNext(); )
199 {
200 res.add( ((Attachment)(atti.next())).getName() );
201 }
202
203 internalUpdateReferences( page.getName(), res );
204 }
205
206 /**
207 * Initializes the entire reference manager with the initial set of pages
208 * from the collection.
209 *
210 * @param pages A collection of all pages you want to be included in the reference
211 * count.
212 * @since 2.2
213 * @throws ProviderException If reading of pages fail.
214 */
215 public void initialize( Collection pages )
216 throws ProviderException
217 {
218 log.debug( "Initializing new ReferenceManager with "+pages.size()+" initial pages." );
219 StopWatch sw = new StopWatch();
220 sw.start();
221 log.info( "Starting cross reference scan of WikiPages" );
222
223 //
224 // First, try to serialize old data from disk. If that fails,
225 // we'll go and update the entire reference lists (which'll take
226 // time)
227 //
228 try
229 {
230 //
231 // Unserialize things. The loop below cannot be combined with
232 // the other loop below, simply because engine.getPage() has
233 // side effects such as loading initializing the user databases,
234 // which in turn want all of the pages to be read already...
235 //
236 // Yes, this is a kludge. We know. Will be fixed.
237 //
238 long saved = unserializeFromDisk();
239
240 for( Iterator it = pages.iterator(); it.hasNext(); )
241 {
242 WikiPage page = (WikiPage) it.next();
243
244 unserializeAttrsFromDisk( page );
245 }
246
247 //
248 // Now we must check if any of the pages have been changed
249 // while we were in the electronic la-la-land, and update
250 // the references for them.
251 //
252
253 Iterator it = pages.iterator();
254
255 while( it.hasNext() )
256 {
257 WikiPage page = (WikiPage) it.next();
258
259 if( page instanceof Attachment )
260 {
261 // Skip attachments
262 }
263 else
264 {
265
266 // Refresh with the latest copy
267 page = m_engine.getPage( page.getName() );
268
269 if( page.getLastModified() == null )
270 {
271 log.fatal( "Provider returns null lastModified. Please submit a bug report." );
272 }
273 else if( page.getLastModified().getTime() > saved )
274 {
275 updatePageReferences( page );
276 }
277 }
278 }
279
280 }
281 catch( Exception e )
282 {
283 log.info("Unable to unserialize old refmgr information, rebuilding database: "+e.getMessage());
284 buildKeyLists( pages );
285
286 // Scan the existing pages from disk and update references in the manager.
287 Iterator it = pages.iterator();
288 while( it.hasNext() )
289 {
290 WikiPage page = (WikiPage)it.next();
291
292 if( page instanceof Attachment )
293 {
294 // We cannot build a reference list from the contents
295 // of attachments, so we skip them.
296 }
297 else
298 {
299 updatePageReferences( page );
300
301 serializeAttrsToDisk( page );
302 }
303
304 }
305
306 serializeToDisk();
307 }
308
309 sw.stop();
310 log.info( "Cross reference scan done in "+sw );
311
312 WikiEventUtils.addWikiEventListener(m_engine.getPageManager(),
313 WikiPageEvent.PAGE_DELETED, this);
314 }
315
316 /**
317 * Reads the serialized data from the disk back to memory.
318 * Returns the date when the data was last written on disk
319 */
320 @SuppressWarnings("unchecked")
321 private synchronized long unserializeFromDisk()
322 throws IOException,
323 ClassNotFoundException
324 {
325 ObjectInputStream in = null;
326 long saved = 0L;
327
328 try
329 {
330 StopWatch sw = new StopWatch();
331 sw.start();
332
333 File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
334
335 in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) );
336
337 long ver = in.readLong();
338
339 if( ver != serialVersionUID )
340 {
341 throw new IOException("File format has changed; I need to recalculate references.");
342 }
343
344 saved = in.readLong();
345 m_refersTo = (Map) in.readObject();
346 m_referredBy = (Map) in.readObject();
347
348 in.close();
349
350 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy );
351 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo );
352
353 sw.stop();
354 log.debug("Read serialized data successfully in "+sw);
355 }
356 finally
357 {
358 if( in != null ) in.close();
359 }
360
361 return saved;
362 }
363
364 /**
365 * Serializes hashmaps to disk. The format is private, don't touch it.
366 */
367 private synchronized void serializeToDisk()
368 {
369 ObjectOutputStream out = null;
370
371 try
372 {
373 StopWatch sw = new StopWatch();
374 sw.start();
375
376 File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE );
377
378 out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) );
379
380 out.writeLong( serialVersionUID );
381 out.writeLong( System.currentTimeMillis() ); // Timestamp
382 out.writeObject( m_refersTo );
383 out.writeObject( m_referredBy );
384
385 out.close();
386
387 sw.stop();
388
389 log.debug("serialization done - took "+sw);
390 }
391 catch( IOException e )
392 {
393 log.error("Unable to serialize!");
394
395 try
396 {
397 if( out != null ) out.close();
398 }
399 catch( IOException ex ) {}
400 }
401 }
402
403 private String getHashFileName( String pageName )
404 throws NoSuchAlgorithmException
405 {
406 MessageDigest digest = MessageDigest.getInstance("MD5");
407
408 byte[] dig;
409 try
410 {
411 dig = digest.digest( pageName.getBytes("UTF-8") );
412 }
413 catch (UnsupportedEncodingException e)
414 {
415 throw new InternalWikiException("AAAAGH! UTF-8 is gone! My eyes! It burns...!");
416 }
417
418 return TextUtil.toHexString(dig)+".cache";
419 }
420
421 /**
422 * Reads the serialized data from the disk back to memory.
423 * Returns the date when the data was last written on disk
424 */
425 private synchronized long unserializeAttrsFromDisk(WikiPage p)
426 throws IOException,
427 ClassNotFoundException
428 {
429 ObjectInputStream in = null;
430 long saved = 0L;
431
432 try
433 {
434 StopWatch sw = new StopWatch();
435 sw.start();
436
437 //
438 // Find attribute cache, and check if it exists
439 //
440 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
441
442 f = new File( f, getHashFileName(p.getName()) );
443
444 if( !f.exists() )
445 {
446 return 0L;
447 }
448
449 log.debug("Deserializing attributes for "+p.getName());
450
451 in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) );
452
453 long ver = in.readLong();
454
455 if( ver != serialVersionUID )
456 {
457 log.debug("File format has changed; cannot deserialize.");
458 return 0L;
459 }
460
461 saved = in.readLong();
462
463 String name = in.readUTF();
464
465 if( !name.equals(p.getName()) )
466 {
467 log.debug("File name does not match ("+name+"), skipping...");
468 return 0L; // Not here
469 }
470
471 long entries = in.readLong();
472
473 for( int i = 0; i < entries; i++ )
474 {
475 String key = in.readUTF();
476 Object value = in.readObject();
477
478 p.setAttribute( key, value );
479
480 log.debug(" attr: "+key+"="+value);
481 }
482
483 in.close();
484
485 sw.stop();
486 log.debug("Read serialized data for "+name+" successfully in "+sw);
487 p.setHasMetadata();
488 }
489 catch( NoSuchAlgorithmException e )
490 {
491 log.fatal("No MD5!?!");
492 }
493 finally
494 {
495 if( in != null ) in.close();
496 }
497
498 return saved;
499 }
500
501 /**
502 * Serializes hashmaps to disk. The format is private, don't touch it.
503 */
504 private synchronized void serializeAttrsToDisk( WikiPage p )
505 {
506 ObjectOutputStream out = null;
507 StopWatch sw = new StopWatch();
508 sw.start();
509
510 try
511 {
512 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
513
514 if( !f.exists() ) f.mkdirs();
515
516 //
517 // Create a digest for the name
518 //
519 f = new File( f, getHashFileName(p.getName()) );
520
521 // FIXME: There is a concurrency issue here...
522 Set entries = p.getAttributes().entrySet();
523
524 if( entries.size() == 0 )
525 {
526 // Nothing to serialize, therefore we will just simply remove the
527 // serialization file so that the next time we boot, we don't
528 // deserialize old data.
529 f.delete();
530 return;
531 }
532
533 out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) );
534
535 out.writeLong( serialVersionUID );
536 out.writeLong( System.currentTimeMillis() ); // Timestamp
537
538 out.writeUTF( p.getName() );
539 out.writeLong( entries.size() );
540
541 for( Iterator i = entries.iterator(); i.hasNext(); )
542 {
543 Map.Entry e = (Map.Entry) i.next();
544
545 if( e.getValue() instanceof Serializable )
546 {
547 out.writeUTF( (String)e.getKey() );
548 out.writeObject( e.getValue() );
549 }
550 }
551
552 out.close();
553
554 }
555 catch( IOException e )
556 {
557 log.error("Unable to serialize!");
558
559 try
560 {
561 if( out != null ) out.close();
562 }
563 catch( IOException ex ) {}
564 }
565 catch( NoSuchAlgorithmException e )
566 {
567 log.fatal("No MD5 algorithm!?!");
568 }
569 finally
570 {
571 sw.stop();
572
573 log.debug("serialization for "+p.getName()+" done - took "+sw);
574 }
575 }
576
577 /**
578 * After the page has been saved, updates the reference lists.
579 *
580 * @param context {@inheritDoc}
581 * @param content {@inheritDoc}
582 */
583 public void postSave( WikiContext context, String content )
584 {
585 WikiPage page = context.getPage();
586
587 updateReferences( page.getName(),
588 context.getEngine().scanWikiLinks( page, content ) );
589
590 serializeAttrsToDisk( page );
591 }
592
593 /**
594 * Updates the m_referedTo and m_referredBy hashmaps when a page has been
595 * deleted.
596 * <P>
597 * Within the m_refersTo map the pagename is a key. The whole key-value-set
598 * has to be removed to keep the map clean.
599 * Within the m_referredBy map the name is stored as a value. Since a key
600 * can have more than one value we have to delete just the key-value-pair
601 * referring page:deleted page.
602 *
603 * @param page Name of the page to remove from the maps.
604 */
605 public synchronized void pageRemoved( WikiPage page )
606 {
607 String pageName = page.getName();
608
609 pageRemoved(pageName);
610 }
611
612 private void pageRemoved(String pageName)
613 {
614 Collection<String> refTo = m_refersTo.get( pageName );
615
616 if( refTo != null )
617 {
618 Iterator itRefTo = refTo.iterator();
619 while( itRefTo.hasNext() )
620 {
621 String referredPageName = (String)itRefTo.next();
622 Set<String> refBy = m_referredBy.get( referredPageName );
623
624 if( refBy == null )
625 throw new InternalWikiException("Refmgr out of sync: page "+pageName+" refers to "+referredPageName+", which has null referrers.");
626
627 refBy.remove(pageName);
628
629 m_referredBy.remove( referredPageName );
630
631 // We won't put it back again if it becomes empty and does not exist. It will be added
632 // later on anyway, if it becomes referenced again.
633 if( !(refBy.isEmpty() && !m_engine.pageExists(referredPageName)) )
634 {
635 m_referredBy.put( referredPageName, refBy );
636 }
637 }
638
639 log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName ));
640 m_refersTo.remove( pageName );
641 }
642
643 Set<String> refBy = m_referredBy.get( pageName );
644 if( refBy == null || refBy.isEmpty() )
645 {
646 m_referredBy.remove( pageName );
647 }
648
649 //
650 // Remove any traces from the disk, too
651 //
652 serializeToDisk();
653
654 try
655 {
656 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR );
657
658 f = new File( f, getHashFileName(pageName) );
659
660 if( f.exists() ) f.delete();
661 }
662 catch( NoSuchAlgorithmException e )
663 {
664 log.error("What do you mean - no such algorithm?", e);
665 }
666 }
667
668 /**
669 * Updates the referred pages of a new or edited WikiPage. If a refersTo
670 * entry for this page already exists, it is removed and a new one is built
671 * from scratch. Also calls updateReferredBy() for each referenced page.
672 * <P>
673 * This is the method to call when a new page has been created and we
674 * want to a) set up its references and b) notify the referred pages
675 * of the references. Use this method during run-time.
676 *
677 * @param page Name of the page to update.
678 * @param references A Collection of Strings, each one pointing to a page this page references.
679 */
680 public synchronized void updateReferences( String page, Collection< String > references )
681 {
682 internalUpdateReferences(page, references);
683
684 serializeToDisk();
685 }
686
687 /**
688 * Updates the referred pages of a new or edited WikiPage. If a refersTo
689 * entry for this page already exists, it is removed and a new one is built
690 * from scratch. Also calls updateReferredBy() for each referenced page.
691 * <p>
692 * This method does not synchronize the database to disk.
693 *
694 * @param page Name of the page to update.
695 * @param references A Collection of Strings, each one pointing to a page this page references.
696 */
697
698 private void internalUpdateReferences(String page, Collection< String > references)
699 {
700 page = getFinalPageName( page );
701
702 //
703 // Create a new entry in m_refersTo.
704 //
705 Collection oldRefTo = m_refersTo.get( page );
706 m_refersTo.remove( page );
707
708 TreeSet<String> cleanedRefs = new TreeSet<String>();
709 for( Iterator< String > i = references.iterator(); i.hasNext(); )
710 {
711 String ref = i.next();
712
713 ref = getFinalPageName( ref );
714
715 cleanedRefs.add( ref );
716 }
717
718 m_refersTo.put( page, cleanedRefs );
719
720 //
721 // We know the page exists, since it's making references somewhere.
722 // If an entry for it didn't exist previously in m_referredBy, make
723 // sure one is added now.
724 //
725 if( !m_referredBy.containsKey( page ) )
726 {
727 m_referredBy.put( page, new TreeSet<String>() );
728 }
729
730 //
731 // Get all pages that used to be referred to by 'page' and
732 // remove that reference. (We don't want to try to figure out
733 // which particular references were removed...)
734 //
735 cleanReferredBy( page, oldRefTo, cleanedRefs );
736
737 //
738 // Notify all referred pages of their referinesshoodicity.
739 //
740 Iterator<String> it = cleanedRefs.iterator();
741 while( it.hasNext() )
742 {
743 String referredPageName = it.next();
744 updateReferredBy( getFinalPageName(referredPageName), page );
745 }
746 }
747
748 /**
749 * Returns the refers-to list. For debugging.
750 *
751 * @return The refers-to list.
752 */
753 protected Map getRefersTo()
754 {
755 return m_refersTo;
756 }
757
758 /**
759 * Returns the referred-by list. For debugging.
760 *
761 * @return Referred-by lists.
762 */
763 protected Map getReferredBy()
764 {
765 return m_referredBy;
766 }
767
768 /**
769 * Cleans the 'referred by' list, removing references by 'referrer' to
770 * any other page. Called after 'referrer' is removed.
771 */
772 private void cleanReferredBy( String referrer,
773 Collection oldReferred,
774 Collection<String> newReferred )
775 {
776 // Two ways to go about this. One is to look up all pages previously
777 // referred by referrer and remove referrer from their lists, and let
778 // the update put them back in (except possibly removed ones).
779 // The other is to get the old referred to list, compare to the new,
780 // and tell the ones missing in the latter to remove referrer from
781 // their list. Hm. We'll just try the first for now. Need to come
782 // back and optimize this a bit.
783
784 if( oldReferred == null )
785 return;
786
787 Iterator it = oldReferred.iterator();
788 while( it.hasNext() )
789 {
790 String referredPage = (String)it.next();
791 Set oldRefBy = m_referredBy.get( referredPage );
792 if( oldRefBy != null )
793 {
794 oldRefBy.remove( referrer );
795 }
796
797 // If the page is referred to by no one AND it doesn't even
798 // exist, we might just as well forget about this entry.
799 // It will be added again elsewhere if new references appear.
800 if( ( ( oldRefBy == null ) || ( oldRefBy.isEmpty() ) ) &&
801 ( m_engine.pageExists( referredPage ) == false ) )
802 {
803 m_referredBy.remove( referredPage );
804 }
805 }
806
807 }
808
809
810 /**
811 * When initially building a ReferenceManager from scratch, call this method
812 * BEFORE calling updateReferences() with a full list of existing page names.
813 * It builds the refersTo and referredBy key lists, thus enabling
814 * updateReferences() to function correctly.
815 * <P>
816 * This method should NEVER be called after initialization. It clears all mappings
817 * from the reference tables.
818 *
819 * @param pages a Collection containing WikiPage objects.
820 */
821 private synchronized void buildKeyLists( Collection pages )
822 {
823 m_refersTo.clear();
824 m_referredBy.clear();
825
826 if( pages == null )
827 return;
828
829 Iterator it = pages.iterator();
830 try
831 {
832 while( it.hasNext() )
833 {
834 WikiPage page = (WikiPage)it.next();
835 // We add a non-null entry to referredBy to indicate the referred page exists
836 m_referredBy.put( page.getName(), new TreeSet<String>() );
837 // Just add a key to refersTo; the keys need to be in sync with referredBy.
838 m_refersTo.put( page.getName(), null );
839 }
840 }
841 catch( ClassCastException e )
842 {
843 log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e );
844 }
845 }
846
847
848 /**
849 * Marks the page as referred to by the referrer. If the page does not
850 * exist previously, nothing is done. (This means that some page, somewhere,
851 * has a link to a page that does not exist.)
852 * <P>
853 * This method is NOT synchronized. It should only be referred to from
854 * within a synchronized method, or it should be made synced if necessary.
855 */
856 private void updateReferredBy( String page, String referrer )
857 {
858 // We're not really interested in first level self-references.
859 /*
860 if( page.equals( referrer ) )
861 {
862 return;
863 }
864 */
865 // Neither are we interested if plural forms refer to each other.
866 if( m_matchEnglishPlurals )
867 {
868 String p2 = page.endsWith("s") ? page.substring(0,page.length()-1) : page+"s";
869
870 if( referrer.equals(p2) )
871 {
872 return;
873 }
874 }
875
876 Set<String> referrers = m_referredBy.get( page );
877
878 // Even if 'page' has not been created yet, it can still be referenced.
879 // This requires we don't use m_referredBy keys when looking up missing
880 // pages, of course.
881 if(referrers == null)
882 {
883 referrers = new TreeSet<String>();
884 m_referredBy.put( page, referrers );
885 }
886 referrers.add( referrer );
887 }
888
889
890 /**
891 * Clears the references to a certain page so it's no longer in the map.
892 *
893 * @param pagename Name of the page to clear references for.
894 */
895 public synchronized void clearPageEntries( String pagename )
896 {
897 pagename = getFinalPageName(pagename);
898
899 //
900 // Remove this item from the referredBy list of any page
901 // which this item refers to.
902 //
903 Collection<String> c = m_refersTo.get( pagename );
904
905 if( c != null )
906 {
907 for( String key : c )
908 {
909 Collection<?> dref = m_referredBy.get( key );
910
911 dref.remove( pagename );
912 }
913 }
914
915 //
916 // Finally, remove direct references.
917 //
918 m_referredBy.remove( pagename );
919 m_refersTo.remove( pagename );
920 }
921
922
923 /**
924 * Finds all unreferenced pages. This requires a linear scan through
925 * m_referredBy to locate keys with null or empty values.
926 *
927 * @return The Collection of Strings
928 */
929 public synchronized Collection findUnreferenced()
930 {
931 ArrayList<String> unref = new ArrayList<String>();
932
933 for( String key : m_referredBy.keySet() )
934 {
935 Set<?> refs = getReferenceList( m_referredBy, key );
936
937 if( refs == null || refs.isEmpty() )
938 {
939 unref.add( key );
940 }
941 }
942
943 return unref;
944 }
945
946
947 /**
948 * Finds all references to non-existant pages. This requires a linear
949 * scan through m_refersTo values; each value must have a corresponding
950 * key entry in the reference Maps, otherwise such a page has never
951 * been created.
952 * <P>
953 * Returns a Collection containing Strings of unreferenced page names.
954 * Each non-existant page name is shown only once - we don't return information
955 * on who referred to it.
956 *
957 * @return A Collection of Strings
958 */
959 public synchronized Collection findUncreated()
960 {
961 TreeSet<String> uncreated = new TreeSet<String>();
962
963 // Go through m_refersTo values and check that m_refersTo has the corresponding keys.
964 // We want to reread the code to make sure our HashMaps are in sync...
965
966 Collection<Collection<String>> allReferences = m_refersTo.values();
967
968 for( Collection<String> refs : allReferences )
969 {
970 if( refs != null )
971 {
972 for( String aReference : refs )
973 {
974 if( m_engine.pageExists( aReference ) == false )
975 {
976 uncreated.add( aReference );
977 }
978 }
979 }
980 }
981
982 return uncreated;
983 }
984
985 /**
986 * Searches for the given page in the given Map, and returns
987 * the set of references. This method also takes care of English plural
988 * matching.
989 *
990 * @param coll The Map to search in
991 * @param pagename The name to find.
992 * @return The references list.
993 */
994 private <T> Set<T> getReferenceList( Map<String,Set<T>> coll, String pagename )
995 {
996 Set<T> refs = coll.get( pagename );
997
998 if( m_matchEnglishPlurals )
999 {
1000 //
1001 // We'll add also matches from the "other" page.
1002 //
1003 Set<T> refs2;
1004
1005 if( pagename.endsWith("s") )
1006 {
1007 refs2 = coll.get( pagename.substring(0,pagename.length()-1) );
1008 }
1009 else
1010 {
1011 refs2 = coll.get( pagename+"s" );
1012 }
1013
1014 if( refs2 != null )
1015 {
1016 if( refs != null )
1017 refs.addAll( refs2 );
1018 else
1019 refs = refs2;
1020 }
1021 }
1022 return refs;
1023 }
1024
1025 /**
1026 * Find all pages that refer to this page. Returns null if the page
1027 * does not exist or is not referenced at all, otherwise returns a
1028 * collection containing page names (String) that refer to this one.
1029 * <p>
1030 * @param pagename The page to find referrers for.
1031 * @return A Collection of Strings. (This is, in fact, a Set, and is likely
1032 * to change at some point to a Set). May return null, if the page
1033 * does not exist, or if it has no references.
1034 */
1035 // FIXME: Return a Set instead of a Collection.
1036 public synchronized Collection findReferrers( String pagename )
1037 {
1038 Set<String> refs = getReferenceList( m_referredBy, pagename );
1039
1040 if( refs == null || refs.isEmpty() )
1041 {
1042 return null;
1043 }
1044
1045 return refs;
1046
1047 }
1048
1049 /**
1050 * Returns all pages that refer to this page. Note that this method
1051 * returns an unmodifiable Map, which may be abruptly changed. So any
1052 * access to any iterator may result in a ConcurrentModificationException.
1053 * <p>
1054 * The advantages of using this method over findReferrers() is that
1055 * it is very fast, as it does not create a new object. The disadvantages
1056 * are that it does not do any mapping between plural names, and you
1057 * may end up getting a ConcurrentModificationException.
1058 *
1059 * @param pageName Page name to query.
1060 * @return A Set of Strings containing the names of all the pages that refer
1061 * to this page. May return null, if the page does not exist or
1062 * has not been indexed yet.
1063 * @since 2.2.33
1064 */
1065 public Set findReferredBy( String pageName )
1066 {
1067 return m_unmutableReferredBy.get( getFinalPageName(pageName) );
1068 }
1069
1070 /**
1071 * Returns all pages that this page refers to. You can use this as a quick
1072 * way of getting the links from a page, but note that it does not link any
1073 * InterWiki, image, or external links. It does contain attachments, though.
1074 * <p>
1075 * The Collection returned is unmutable, so you cannot change it. It does reflect
1076 * the current status and thus is a live object. So, if you are using any
1077 * kind of an iterator on it, be prepared for ConcurrentModificationExceptions.
1078 * <p>
1079 * The returned value is a Collection, because a page may refer to another page
1080 * multiple times.
1081 *
1082 * @param pageName Page name to query
1083 * @return A Collection of Strings containing the names of the pages that this page
1084 * refers to. May return null, if the page does not exist or has not
1085 * been indexed yet.
1086 * @since 2.2.33
1087 */
1088 public Collection findRefersTo( String pageName )
1089 {
1090 return m_unmutableRefersTo.get( getFinalPageName(pageName) );
1091 }
1092
1093 /**
1094 * This 'deepHashCode' can be used to determine if there were any
1095 * modifications made to the underlying to and by maps of the
1096 * ReferenceManager. The maps of the ReferenceManager are not
1097 * synchronized, so someone could add/remove entries in them while the
1098 * hashCode is being computed.
1099 *
1100 * @return Sum of the hashCodes for the to and by maps of the
1101 * ReferenceManager
1102 * @since 2.3.24
1103 */
1104 //
1105 // This method traps and retries if a concurrent
1106 // modifcaition occurs.
1107 // TODO: It is unnecessary to calculate the hashcode; it should be calculated only
1108 // when the hashmaps are changed. This is slow.
1109 //
1110 public int deepHashCode()
1111 {
1112 boolean failed = true;
1113 int signature = 0;
1114
1115 while (failed)
1116 {
1117 signature = 0;
1118 try
1119 {
1120 signature ^= m_referredBy.hashCode();
1121 signature ^= m_refersTo.hashCode();
1122 failed = false;
1123 }
1124 catch (ConcurrentModificationException e)
1125 {
1126 Thread.yield();
1127 }
1128 }
1129
1130 return signature;
1131 }
1132
1133 /**
1134 * Returns a list of all pages that the ReferenceManager knows about.
1135 * This should be roughly equivalent to PageManager.getAllPages(), but without
1136 * the potential disk access overhead. Note that this method is not guaranteed
1137 * to return a Set of really all pages (especially during startup), but it is
1138 * very fast.
1139 *
1140 * @return A Set of all defined page names that ReferenceManager knows about.
1141 * @since 2.3.24
1142 */
1143 public Set findCreated()
1144 {
1145 return new HashSet<String>( m_refersTo.keySet() );
1146 }
1147
1148 private String getFinalPageName( String orig )
1149 {
1150 try
1151 {
1152 String s = m_engine.getFinalPageName( orig );
1153
1154 if( s == null ) s = orig;
1155
1156 return s;
1157 }
1158 catch( ProviderException e )
1159 {
1160 log.error("Error while trying to fetch a page name; trying to cope with the situation.",e);
1161
1162 return orig;
1163 }
1164 }
1165
1166 /**
1167 * {@inheritDoc}
1168 */
1169 public void actionPerformed(WikiEvent event)
1170 {
1171 if( (event instanceof WikiPageEvent) && (event.getType() == WikiPageEvent.PAGE_DELETED) )
1172 {
1173 String pageName = ((WikiPageEvent) event).getPageName();
1174
1175 if( pageName != null )
1176 {
1177 pageRemoved( pageName );
1178 }
1179 }
1180 }
1181 }