001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki; 020 021import org.apache.commons.lang3.time.StopWatch; 022import org.apache.log4j.Logger; 023import org.apache.wiki.api.exceptions.ProviderException; 024import org.apache.wiki.api.filters.BasicPageFilter; 025import org.apache.wiki.attachment.Attachment; 026import org.apache.wiki.event.WikiEvent; 027import org.apache.wiki.event.WikiEventListener; 028import org.apache.wiki.event.WikiEventUtils; 029import org.apache.wiki.event.WikiPageEvent; 030import org.apache.wiki.modules.InternalModule; 031import org.apache.wiki.providers.WikiPageProvider; 032import org.apache.wiki.util.TextUtil; 033 034import java.io.BufferedInputStream; 035import java.io.BufferedOutputStream; 036import java.io.File; 037import java.io.FileInputStream; 038import java.io.FileOutputStream; 039import java.io.IOException; 040import java.io.ObjectInputStream; 041import java.io.ObjectOutputStream; 042import java.io.Serializable; 043import java.nio.charset.StandardCharsets; 044import java.security.MessageDigest; 045import java.security.NoSuchAlgorithmException; 046import java.util.ArrayList; 047import java.util.Collection; 048import java.util.Collections; 049import java.util.ConcurrentModificationException; 050import java.util.HashMap; 051import java.util.HashSet; 052import java.util.Iterator; 053import java.util.List; 054import java.util.Map; 055import java.util.Set; 056import java.util.TreeSet; 057 058/* 059 BUGS 060 061 - if a wikilink is added to a page, then removed, RefMan still thinks that 062 the page refers to the wikilink page. Hm. 063 064 - if a page is deleted, gets very confused. 065 066 - Serialization causes page attributes to be missing, when InitializablePlugins 067 are not executed properly. Thus, serialization should really also mark whether 068 a page is serializable or not... 069 */ 070 071 072/* 073 A word about synchronizing: 074 075 I expect this object to be accessed in three situations: 076 - when a WikiEngine is created and it scans its wikipages 077 - when the WE saves a page 078 - when a JSP page accesses one of the WE's ReferenceManagers 079 to display a list of (un)referenced pages. 080 081 So, access to this class is fairly rare, and usually triggered by 082 user interaction. OTOH, the methods in this class use their storage 083 objects intensively (and, sorry to say, in an unoptimized manner =). 084 My deduction: using unsynchronized HashMaps etc and syncing methods 085 or code blocks is preferrable to using slow, synced storage objects. 086 We don't have iterative code here, so I'm going to use synced methods 087 for now. 088 089 Please contact me if you notice problems with ReferenceManager, and 090 especially with synchronization, or if you have suggestions about 091 syncing. 092 093 ebu@memecry.net 094*/ 095 096/** 097 * Keeps track of wikipage references: 098 * <UL> 099 * <LI>What pages a given page refers to 100 * <LI>What pages refer to a given page 101 * </UL> 102 * 103 * This is a quick'n'dirty approach without any finesse in storage and 104 * searching algorithms; we trust java.util.*. 105 * <P> 106 * This class contains two HashMaps, m_refersTo and m_referredBy. The 107 * first is indexed by WikiPage names and contains a Collection of all 108 * WikiPages the page refers to. (Multiple references are not counted, 109 * naturally.) The second is indexed by WikiPage names and contains 110 * a Set of all pages that refer to the indexing page. (Notice - 111 * the keys of both Maps should be kept in sync.) 112 * <P> 113 * When a page is added or edited, its references are parsed, a Collection 114 * is received, and we crudely replace anything previous with this new 115 * Collection. We then check each referenced page name and make sure they 116 * know they are referred to by the new page. 117 * <P> 118 * Based on this information, we can perform non-optimal searches for 119 * e.g. unreferenced pages, top ten lists, etc. 120 * <P> 121 * The owning class must take responsibility of filling in any pre-existing 122 * information, probably by loading each and every WikiPage and calling this 123 * class to update the references when created. 124 * 125 * @since 1.6.1 126 */ 127 128// FIXME: The way that we save attributes is now a major booboo, and must be 129// replace forthwith. However, this is a workaround for the great deal 130// of problems that occur here... 131 132public class ReferenceManager 133 extends BasicPageFilter 134 implements InternalModule, WikiEventListener 135{ 136 /** Maps page wikiname to a Collection of pages it refers to. The Collection 137 * must contain Strings. The Collection may contain names of non-existing 138 * pages. 139 */ 140 private Map<String,Collection<String>> m_refersTo; 141 private Map<String,Collection<String>> m_unmutableRefersTo; 142 143 /** Maps page wikiname to a Set of referring pages. The Set must 144 * contain Strings. Non-existing pages (a reference exists, but not a file 145 * for the page contents) may have an empty Set in m_referredBy. 146 */ 147 private Map<String,Set<String>> m_referredBy; 148 private Map<String,Set<String>> m_unmutableReferredBy; 149 150 /** The WikiEngine that owns this object. */ 151 //private WikiEngine m_engine; //inherited from BasicPageFilter class 152 153 private boolean m_matchEnglishPlurals = false; 154 155 private static Logger log = Logger.getLogger(ReferenceManager.class); 156 157 private static final String SERIALIZATION_FILE = "refmgr.ser"; 158 private static final String SERIALIZATION_DIR = "refmgr-attr"; 159 160 /** We use this also a generic serialization id */ 161 private static final long serialVersionUID = 4L; 162 163 /** 164 * Builds a new ReferenceManager. 165 * 166 * @param engine The WikiEngine to which this is managing references to. 167 */ 168 public ReferenceManager( WikiEngine engine ) 169 { 170 m_refersTo = new HashMap<>(); 171 m_referredBy = new HashMap<>(); 172 m_engine = engine; 173 174 m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), 175 WikiEngine.PROP_MATCHPLURALS, 176 m_matchEnglishPlurals ); 177 178 // 179 // Create two maps that contain unmutable versions of the two basic maps. 180 // 181 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 182 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 183 } 184 185 /** 186 * Does a full reference update. Does not sync; assumes that you do it afterwards. 187 */ 188 private void updatePageReferences( WikiPage page ) throws ProviderException 189 { 190 String content = m_engine.getPageManager().getPageText( page.getName(), 191 WikiPageProvider.LATEST_VERSION ); 192 193 TreeSet<String> res = new TreeSet<String>(); 194 Collection<String> links = m_engine.scanWikiLinks( page, content ); 195 196 res.addAll( links ); 197 List< Attachment > attachments = m_engine.getAttachmentManager().listAttachments( page ); 198 199 for( Iterator< Attachment > atti = attachments.iterator(); atti.hasNext(); ) 200 { 201 res.add( atti.next().getName() ); 202 } 203 204 internalUpdateReferences( page.getName(), res ); 205 } 206 207 /** 208 * Initializes the entire reference manager with the initial set of pages 209 * from the collection. 210 * 211 * @param pages A collection of all pages you want to be included in the reference 212 * count. 213 * @since 2.2 214 * @throws ProviderException If reading of pages fail. 215 */ 216 public void initialize( Collection< WikiPage > pages ) throws ProviderException 217 { 218 log.debug( "Initializing new ReferenceManager with "+pages.size()+" initial pages." ); 219 StopWatch sw = new StopWatch(); 220 sw.start(); 221 log.info( "Starting cross reference scan of WikiPages" ); 222 223 // 224 // First, try to serialize old data from disk. If that fails, 225 // we'll go and update the entire reference lists (which'll take 226 // time) 227 // 228 try 229 { 230 // 231 // Unserialize things. The loop below cannot be combined with 232 // the other loop below, simply because engine.getPage() has 233 // side effects such as loading initializing the user databases, 234 // which in turn want all of the pages to be read already... 235 // 236 // Yes, this is a kludge. We know. Will be fixed. 237 // 238 long saved = unserializeFromDisk(); 239 240 for( Iterator< WikiPage > it = pages.iterator(); it.hasNext(); ) 241 { 242 WikiPage page = it.next(); 243 244 unserializeAttrsFromDisk( page ); 245 } 246 247 // 248 // Now we must check if any of the pages have been changed 249 // while we were in the electronic la-la-land, and update 250 // the references for them. 251 // 252 253 Iterator< WikiPage > it = pages.iterator(); 254 255 while( it.hasNext() ) 256 { 257 WikiPage page = it.next(); 258 259 if( page instanceof Attachment ) 260 { 261 // Skip attachments 262 } 263 else 264 { 265 266 // Refresh with the latest copy 267 page = m_engine.getPage( page.getName() ); 268 269 if( page.getLastModified() == null ) 270 { 271 log.fatal( "Provider returns null lastModified. Please submit a bug report." ); 272 } 273 else if( page.getLastModified().getTime() > saved ) 274 { 275 updatePageReferences( page ); 276 } 277 } 278 } 279 280 } 281 catch( Exception e ) 282 { 283 log.info("Unable to unserialize old refmgr information, rebuilding database: "+e.getMessage()); 284 buildKeyLists( pages ); 285 286 // Scan the existing pages from disk and update references in the manager. 287 Iterator< WikiPage > it = pages.iterator(); 288 while( it.hasNext() ) 289 { 290 WikiPage page = it.next(); 291 292 if( page instanceof Attachment ) 293 { 294 // We cannot build a reference list from the contents 295 // of attachments, so we skip them. 296 } 297 else 298 { 299 updatePageReferences( page ); 300 301 serializeAttrsToDisk( page ); 302 } 303 304 } 305 306 serializeToDisk(); 307 } 308 309 sw.stop(); 310 log.info( "Cross reference scan done in "+sw ); 311 312 WikiEventUtils.addWikiEventListener(m_engine.getPageManager(), WikiPageEvent.PAGE_DELETED, this); 313 } 314 315 /** 316 * Reads the serialized data from the disk back to memory. 317 * Returns the date when the data was last written on disk 318 */ 319 @SuppressWarnings("unchecked") 320 private synchronized long unserializeFromDisk() 321 throws IOException, 322 ClassNotFoundException 323 { 324 long saved = 0L; 325 326 File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 327 try( ObjectInputStream in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) ) ) 328 { 329 StopWatch sw = new StopWatch(); 330 sw.start(); 331 332 long ver = in.readLong(); 333 334 if( ver != serialVersionUID ) 335 { 336 throw new IOException("File format has changed; I need to recalculate references."); 337 } 338 339 saved = in.readLong(); 340 m_refersTo = ( Map< String, Collection< String > > ) in.readObject(); 341 m_referredBy = ( Map< String, Set< String > > ) in.readObject(); 342 343 in.close(); 344 345 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 346 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 347 348 sw.stop(); 349 log.debug("Read serialized data successfully in "+sw); 350 } 351 352 return saved; 353 } 354 355 /** 356 * Serializes hashmaps to disk. The format is private, don't touch it. 357 */ 358 private synchronized void serializeToDisk() 359 { 360 File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 361 try( ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) { 362 StopWatch sw = new StopWatch(); 363 sw.start(); 364 365 out.writeLong( serialVersionUID ); 366 out.writeLong( System.currentTimeMillis() ); // Timestamp 367 out.writeObject( m_refersTo ); 368 out.writeObject( m_referredBy ); 369 370 out.close(); 371 372 sw.stop(); 373 374 log.debug("serialization done - took "+sw); 375 } 376 catch( IOException ioe ) 377 { 378 log.error("Unable to serialize!", ioe); 379 } 380 } 381 382 private String getHashFileName( String pageName ) { 383 try { 384 MessageDigest digest = MessageDigest.getInstance("MD5"); 385 byte[] dig = digest.digest( pageName.getBytes(StandardCharsets.UTF_8) ); 386 387 return TextUtil.toHexString(dig)+".cache"; 388 } catch( NoSuchAlgorithmException e ) { 389 log.fatal( "What do you mean - no such algorithm?", e ); 390 return null; 391 } 392 } 393 394 /** 395 * Reads the serialized data from the disk back to memory. 396 * Returns the date when the data was last written on disk 397 */ 398 private synchronized long unserializeAttrsFromDisk(WikiPage p) 399 throws IOException, 400 ClassNotFoundException 401 { 402 long saved = 0L; 403 404 // 405 // Find attribute cache, and check if it exists 406 // 407 String hashName = getHashFileName( p.getName() ); 408 if( hashName != null ) { 409 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 410 411 f = new File( f, hashName ); 412 413 if( !f.exists() ) 414 { 415 return 0L; 416 } 417 try( ObjectInputStream in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) )) 418 { 419 StopWatch sw = new StopWatch(); 420 sw.start(); 421 422 log.debug("Deserializing attributes for "+p.getName()); 423 424 long ver = in.readLong(); 425 426 if( ver != serialVersionUID ) 427 { 428 log.debug("File format has changed; cannot deserialize."); 429 return 0L; 430 } 431 432 saved = in.readLong(); 433 434 String name = in.readUTF(); 435 436 if( !name.equals(p.getName()) ) 437 { 438 log.debug("File name does not match ("+name+"), skipping..."); 439 return 0L; // Not here 440 } 441 442 long entries = in.readLong(); 443 444 for( int i = 0; i < entries; i++ ) 445 { 446 String key = in.readUTF(); 447 Object value = in.readObject(); 448 449 p.setAttribute( key, value ); 450 451 log.debug(" attr: "+key+"="+value); 452 } 453 454 in.close(); 455 456 sw.stop(); 457 log.debug("Read serialized data for "+name+" successfully in "+sw); 458 p.setHasMetadata(); 459 } 460 } 461 462 463 return saved; 464 } 465 466 /** 467 * Serializes hashmaps to disk. The format is private, don't touch it. 468 */ 469 private synchronized void serializeAttrsToDisk( WikiPage p ) 470 { 471 StopWatch sw = new StopWatch(); 472 sw.start(); 473 474 String hashName = getHashFileName( p.getName() ); 475 if( hashName != null ) { 476 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 477 478 if( !f.exists() ) f.mkdirs(); 479 480 // 481 // Create a digest for the name 482 // 483 f = new File( f, hashName ); 484 485 try( ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) { 486 Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() ); // new Set to avoid concurrency issue 487 488 if( entries.size() == 0 ) 489 { 490 // Nothing to serialize, therefore we will just simply remove the 491 // serialization file so that the next time we boot, we don't 492 // deserialize old data. 493 f.delete(); 494 return; 495 } 496 497 out.writeLong( serialVersionUID ); 498 out.writeLong( System.currentTimeMillis() ); // Timestamp 499 500 out.writeUTF( p.getName() ); 501 out.writeLong( entries.size() ); 502 503 for( Iterator< Map.Entry < String, Object > > i = entries.iterator(); i.hasNext(); ) { 504 Map.Entry< String, Object > e = i.next(); 505 506 if( e.getValue() instanceof Serializable ) { 507 out.writeUTF( e.getKey() ); 508 out.writeObject( e.getValue() ); 509 } 510 } 511 512 } catch( IOException e ) { 513 log.error( "Unable to serialize!", e ); 514 } finally { 515 sw.stop(); 516 log.debug("serialization for "+p.getName()+" done - took "+sw); 517 } 518 } 519 520 } 521 522 /** 523 * After the page has been saved, updates the reference lists. 524 * 525 * @param context {@inheritDoc} 526 * @param content {@inheritDoc} 527 */ 528 @Override 529 public void postSave( WikiContext context, String content ) 530 { 531 WikiPage page = context.getPage(); 532 533 updateReferences( page.getName(), 534 context.getEngine().scanWikiLinks( page, content ) ); 535 536 serializeAttrsToDisk( page ); 537 } 538 539 /** 540 * Updates the m_referedTo and m_referredBy hashmaps when a page has been 541 * deleted. 542 * <P> 543 * Within the m_refersTo map the pagename is a key. The whole key-value-set 544 * has to be removed to keep the map clean. 545 * Within the m_referredBy map the name is stored as a value. Since a key 546 * can have more than one value we have to delete just the key-value-pair 547 * referring page:deleted page. 548 * 549 * @param page Name of the page to remove from the maps. 550 */ 551 public synchronized void pageRemoved( WikiPage page ) 552 { 553 String pageName = page.getName(); 554 555 pageRemoved(pageName); 556 } 557 558 private void pageRemoved(String pageName) 559 { 560 Collection<String> refTo = m_refersTo.get( pageName ); 561 562 if( refTo != null ) 563 { 564 Iterator< String > itRefTo = refTo.iterator(); 565 while( itRefTo.hasNext() ) 566 { 567 String referredPageName = itRefTo.next(); 568 Set<String> refBy = m_referredBy.get( referredPageName ); 569 570 if( refBy == null ) 571 throw new InternalWikiException("Refmgr out of sync: page "+pageName+" refers to "+referredPageName+", which has null referrers."); 572 573 refBy.remove(pageName); 574 575 m_referredBy.remove( referredPageName ); 576 577 // We won't put it back again if it becomes empty and does not exist. It will be added 578 // later on anyway, if it becomes referenced again. 579 if( !(refBy.isEmpty() && !m_engine.pageExists(referredPageName)) ) 580 { 581 m_referredBy.put( referredPageName, refBy ); 582 } 583 } 584 585 log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName )); 586 m_refersTo.remove( pageName ); 587 } 588 589 Set<String> refBy = m_referredBy.get( pageName ); 590 if( refBy == null || refBy.isEmpty() ) 591 { 592 m_referredBy.remove( pageName ); 593 } 594 595 // 596 // Remove any traces from the disk, too 597 // 598 serializeToDisk(); 599 600 String hashName = getHashFileName( pageName ); 601 if( hashName != null ) { 602 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 603 604 f = new File( f, getHashFileName(pageName) ); 605 606 if( f.exists() ) f.delete(); 607 } 608 } 609 610 /** 611 * Updates the referred pages of a new or edited WikiPage. If a refersTo 612 * entry for this page already exists, it is removed and a new one is built 613 * from scratch. Also calls updateReferredBy() for each referenced page. 614 * <P> 615 * This is the method to call when a new page has been created and we 616 * want to a) set up its references and b) notify the referred pages 617 * of the references. Use this method during run-time. 618 * 619 * @param page Name of the page to update. 620 * @param references A Collection of Strings, each one pointing to a page this page references. 621 */ 622 public synchronized void updateReferences( String page, Collection< String > references ) 623 { 624 internalUpdateReferences(page, references); 625 626 serializeToDisk(); 627 } 628 629 /** 630 * Updates the referred pages of a new or edited WikiPage. If a refersTo 631 * entry for this page already exists, it is removed and a new one is built 632 * from scratch. Also calls updateReferredBy() for each referenced page. 633 * <p> 634 * This method does not synchronize the database to disk. 635 * 636 * @param page Name of the page to update. 637 * @param references A Collection of Strings, each one pointing to a page this page references. 638 */ 639 640 private void internalUpdateReferences(String page, Collection< String > references) 641 { 642 page = getFinalPageName( page ); 643 644 // 645 // Create a new entry in m_refersTo. 646 // 647 Collection< String > oldRefTo = m_refersTo.get( page ); 648 m_refersTo.remove( page ); 649 650 TreeSet<String> cleanedRefs = new TreeSet<>(); 651 for( Iterator< String > i = references.iterator(); i.hasNext(); ) 652 { 653 String ref = i.next(); 654 655 ref = getFinalPageName( ref ); 656 657 cleanedRefs.add( ref ); 658 } 659 660 m_refersTo.put( page, cleanedRefs ); 661 662 // 663 // We know the page exists, since it's making references somewhere. 664 // If an entry for it didn't exist previously in m_referredBy, make 665 // sure one is added now. 666 // 667 if( !m_referredBy.containsKey( page ) ) 668 { 669 m_referredBy.put( page, new TreeSet<String>() ); 670 } 671 672 // 673 // Get all pages that used to be referred to by 'page' and 674 // remove that reference. (We don't want to try to figure out 675 // which particular references were removed...) 676 // 677 cleanReferredBy( page, oldRefTo, cleanedRefs ); 678 679 // 680 // Notify all referred pages of their referinesshoodicity. 681 // 682 Iterator<String> it = cleanedRefs.iterator(); 683 while( it.hasNext() ) 684 { 685 String referredPageName = it.next(); 686 updateReferredBy( getFinalPageName(referredPageName), page ); 687 } 688 } 689 690 /** 691 * Returns the refers-to list. For debugging. 692 * 693 * @return The refers-to list. 694 */ 695 protected Map< String, Collection< String > > getRefersTo() 696 { 697 return m_refersTo; 698 } 699 700 /** 701 * Returns the referred-by list. For debugging. 702 * 703 * @return Referred-by lists. 704 */ 705 protected Map< String, Set< String > > getReferredBy() 706 { 707 return m_referredBy; 708 } 709 710 /** 711 * Cleans the 'referred by' list, removing references by 'referrer' to 712 * any other page. Called after 'referrer' is removed. 713 */ 714 private void cleanReferredBy( String referrer, 715 Collection<String> oldReferred, 716 Collection<String> newReferred ) 717 { 718 // Two ways to go about this. One is to look up all pages previously 719 // referred by referrer and remove referrer from their lists, and let 720 // the update put them back in (except possibly removed ones). 721 // The other is to get the old referred to list, compare to the new, 722 // and tell the ones missing in the latter to remove referrer from 723 // their list. Hm. We'll just try the first for now. Need to come 724 // back and optimize this a bit. 725 726 if( oldReferred == null ) 727 return; 728 729 Iterator< String > it = oldReferred.iterator(); 730 while( it.hasNext() ) 731 { 732 String referredPage = it.next(); 733 Set< String > oldRefBy = m_referredBy.get( referredPage ); 734 if( oldRefBy != null ) 735 { 736 oldRefBy.remove( referrer ); 737 } 738 739 // If the page is referred to by no one AND it doesn't even 740 // exist, we might just as well forget about this entry. 741 // It will be added again elsewhere if new references appear. 742 if( ( ( oldRefBy == null ) || ( oldRefBy.isEmpty() ) ) && 743 ( m_engine.pageExists( referredPage ) == false ) ) 744 { 745 m_referredBy.remove( referredPage ); 746 } 747 } 748 749 } 750 751 752 /** 753 * When initially building a ReferenceManager from scratch, call this method 754 * BEFORE calling updateReferences() with a full list of existing page names. 755 * It builds the refersTo and referredBy key lists, thus enabling 756 * updateReferences() to function correctly. 757 * <P> 758 * This method should NEVER be called after initialization. It clears all mappings 759 * from the reference tables. 760 * 761 * @param pages a Collection containing WikiPage objects. 762 */ 763 private synchronized void buildKeyLists( Collection< WikiPage > pages ) 764 { 765 m_refersTo.clear(); 766 m_referredBy.clear(); 767 768 if( pages == null ) 769 return; 770 771 Iterator< WikiPage > it = pages.iterator(); 772 try 773 { 774 while( it.hasNext() ) 775 { 776 WikiPage page = it.next(); 777 // We add a non-null entry to referredBy to indicate the referred page exists 778 m_referredBy.put( page.getName(), new TreeSet<String>() ); 779 // Just add a key to refersTo; the keys need to be in sync with referredBy. 780 m_refersTo.put( page.getName(), null ); 781 } 782 } 783 catch( ClassCastException e ) 784 { 785 log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e ); 786 } 787 } 788 789 790 /** 791 * Marks the page as referred to by the referrer. If the page does not 792 * exist previously, nothing is done. (This means that some page, somewhere, 793 * has a link to a page that does not exist.) 794 * <P> 795 * This method is NOT synchronized. It should only be referred to from 796 * within a synchronized method, or it should be made synced if necessary. 797 */ 798 private void updateReferredBy( String page, String referrer ) 799 { 800 // We're not really interested in first level self-references. 801 /* 802 if( page.equals( referrer ) ) 803 { 804 return; 805 } 806 */ 807 // Neither are we interested if plural forms refer to each other. 808 if( m_matchEnglishPlurals ) 809 { 810 String p2 = page.endsWith("s") ? page.substring(0,page.length()-1) : page+"s"; 811 812 if( referrer.equals(p2) ) 813 { 814 return; 815 } 816 } 817 818 Set<String> referrers = m_referredBy.get( page ); 819 820 // Even if 'page' has not been created yet, it can still be referenced. 821 // This requires we don't use m_referredBy keys when looking up missing 822 // pages, of course. 823 if(referrers == null) 824 { 825 referrers = new TreeSet<String>(); 826 m_referredBy.put( page, referrers ); 827 } 828 referrers.add( referrer ); 829 } 830 831 832 /** 833 * Clears the references to a certain page so it's no longer in the map. 834 * 835 * @param pagename Name of the page to clear references for. 836 */ 837 public synchronized void clearPageEntries( String pagename ) 838 { 839 pagename = getFinalPageName(pagename); 840 841 // 842 // Remove this item from the referredBy list of any page 843 // which this item refers to. 844 // 845 Collection<String> c = m_refersTo.get( pagename ); 846 847 if( c != null ) 848 { 849 for( String key : c ) 850 { 851 Collection<?> dref = m_referredBy.get( key ); 852 853 dref.remove( pagename ); 854 } 855 } 856 857 // 858 // Finally, remove direct references. 859 // 860 m_referredBy.remove( pagename ); 861 m_refersTo.remove( pagename ); 862 } 863 864 865 /** 866 * Finds all unreferenced pages. This requires a linear scan through 867 * m_referredBy to locate keys with null or empty values. 868 * 869 * @return The Collection of Strings 870 */ 871 public synchronized Collection< String > findUnreferenced() 872 { 873 ArrayList<String> unref = new ArrayList<String>(); 874 875 for( String key : m_referredBy.keySet() ) 876 { 877 Set<?> refs = getReferenceList( m_referredBy, key ); 878 879 if( refs == null || refs.isEmpty() ) 880 { 881 unref.add( key ); 882 } 883 } 884 885 return unref; 886 } 887 888 889 /** 890 * Finds all references to non-existant pages. This requires a linear 891 * scan through m_refersTo values; each value must have a corresponding 892 * key entry in the reference Maps, otherwise such a page has never 893 * been created. 894 * <P> 895 * Returns a Collection containing Strings of unreferenced page names. 896 * Each non-existant page name is shown only once - we don't return information 897 * on who referred to it. 898 * 899 * @return A Collection of Strings 900 */ 901 public synchronized Collection< String > findUncreated() 902 { 903 TreeSet<String> uncreated = new TreeSet<String>(); 904 905 // Go through m_refersTo values and check that m_refersTo has the corresponding keys. 906 // We want to reread the code to make sure our HashMaps are in sync... 907 908 Collection<Collection<String>> allReferences = m_refersTo.values(); 909 910 for( Collection<String> refs : allReferences ) 911 { 912 if( refs != null ) 913 { 914 for( String aReference : refs ) 915 { 916 if( m_engine.pageExists( aReference ) == false ) 917 { 918 uncreated.add( aReference ); 919 } 920 } 921 } 922 } 923 924 return uncreated; 925 } 926 927 /** 928 * Searches for the given page in the given Map, and returns 929 * the set of references. This method also takes care of English plural 930 * matching. 931 * 932 * @param coll The Map to search in 933 * @param pagename The name to find. 934 * @return The references list. 935 */ 936 private <T> Set<T> getReferenceList( Map<String,Set<T>> coll, String pagename ) 937 { 938 Set<T> refs = coll.get( pagename ); 939 940 if( m_matchEnglishPlurals ) 941 { 942 // 943 // We'll add also matches from the "other" page. 944 // 945 Set<T> refs2; 946 947 if( pagename.endsWith("s") ) 948 { 949 refs2 = coll.get( pagename.substring(0,pagename.length()-1) ); 950 } 951 else 952 { 953 refs2 = coll.get( pagename+"s" ); 954 } 955 956 if( refs2 != null ) 957 { 958 if( refs != null ) 959 refs.addAll( refs2 ); 960 else 961 refs = refs2; 962 } 963 } 964 return refs; 965 } 966 967 /** 968 * Find all pages that refer to this page. Returns null if the page 969 * does not exist or is not referenced at all, otherwise returns a 970 * collection containing page names (String) that refer to this one. 971 * <p> 972 * @param pagename The page to find referrers for. 973 * @return A Set of Strings. May return null, if the page 974 * does not exist, or if it has no references. 975 */ 976 public synchronized Set< String > findReferrers( String pagename ) 977 { 978 Set<String> refs = getReferenceList( m_referredBy, pagename ); 979 980 if( refs == null || refs.isEmpty() ) 981 { 982 return null; 983 } 984 985 return refs; 986 987 } 988 989 /** 990 * Returns all pages that refer to this page. Note that this method 991 * returns an unmodifiable Map, which may be abruptly changed. So any 992 * access to any iterator may result in a ConcurrentModificationException. 993 * <p> 994 * The advantages of using this method over findReferrers() is that 995 * it is very fast, as it does not create a new object. The disadvantages 996 * are that it does not do any mapping between plural names, and you 997 * may end up getting a ConcurrentModificationException. 998 * 999 * @param pageName Page name to query. 1000 * @return A Set of Strings containing the names of all the pages that refer 1001 * to this page. May return null, if the page does not exist or 1002 * has not been indexed yet. 1003 * @since 2.2.33 1004 */ 1005 public Set< String > findReferredBy( String pageName ) 1006 { 1007 return m_unmutableReferredBy.get( getFinalPageName(pageName) ); 1008 } 1009 1010 /** 1011 * Returns all pages that this page refers to. You can use this as a quick 1012 * way of getting the links from a page, but note that it does not link any 1013 * InterWiki, image, or external links. It does contain attachments, though. 1014 * <p> 1015 * The Collection returned is unmutable, so you cannot change it. It does reflect 1016 * the current status and thus is a live object. So, if you are using any 1017 * kind of an iterator on it, be prepared for ConcurrentModificationExceptions. 1018 * <p> 1019 * The returned value is a Collection, because a page may refer to another page 1020 * multiple times. 1021 * 1022 * @param pageName Page name to query 1023 * @return A Collection of Strings containing the names of the pages that this page 1024 * refers to. May return null, if the page does not exist or has not 1025 * been indexed yet. 1026 * @since 2.2.33 1027 */ 1028 public Collection< String > findRefersTo( String pageName ) 1029 { 1030 return m_unmutableRefersTo.get( getFinalPageName(pageName) ); 1031 } 1032 1033 /** 1034 * This 'deepHashCode' can be used to determine if there were any 1035 * modifications made to the underlying to and by maps of the 1036 * ReferenceManager. The maps of the ReferenceManager are not 1037 * synchronized, so someone could add/remove entries in them while the 1038 * hashCode is being computed. 1039 * 1040 * @return Sum of the hashCodes for the to and by maps of the 1041 * ReferenceManager 1042 * @since 2.3.24 1043 */ 1044 // 1045 // This method traps and retries if a concurrent 1046 // modifcaition occurs. 1047 // TODO: It is unnecessary to calculate the hashcode; it should be calculated only 1048 // when the hashmaps are changed. This is slow. 1049 // 1050 public int deepHashCode() 1051 { 1052 boolean failed = true; 1053 int signature = 0; 1054 1055 while (failed) 1056 { 1057 signature = 0; 1058 try 1059 { 1060 signature ^= m_referredBy.hashCode(); 1061 signature ^= m_refersTo.hashCode(); 1062 failed = false; 1063 } 1064 catch (ConcurrentModificationException e) 1065 { 1066 Thread.yield(); 1067 } 1068 } 1069 1070 return signature; 1071 } 1072 1073 /** 1074 * Returns a list of all pages that the ReferenceManager knows about. 1075 * This should be roughly equivalent to PageManager.getAllPages(), but without 1076 * the potential disk access overhead. Note that this method is not guaranteed 1077 * to return a Set of really all pages (especially during startup), but it is 1078 * very fast. 1079 * 1080 * @return A Set of all defined page names that ReferenceManager knows about. 1081 * @since 2.3.24 1082 */ 1083 public Set< String > findCreated() 1084 { 1085 return new HashSet<String>( m_refersTo.keySet() ); 1086 } 1087 1088 private String getFinalPageName( String orig ) 1089 { 1090 try 1091 { 1092 String s = m_engine.getFinalPageName( orig ); 1093 1094 if( s == null ) s = orig; 1095 1096 return s; 1097 } 1098 catch( ProviderException e ) 1099 { 1100 log.error("Error while trying to fetch a page name; trying to cope with the situation.",e); 1101 1102 return orig; 1103 } 1104 } 1105 1106 /** 1107 * {@inheritDoc} 1108 */ 1109 @Override 1110 public void actionPerformed(WikiEvent event) 1111 { 1112 if( (event instanceof WikiPageEvent) && (event.getType() == WikiPageEvent.PAGE_DELETED) ) 1113 { 1114 String pageName = ((WikiPageEvent) event).getPageName(); 1115 1116 if( pageName != null ) 1117 { 1118 pageRemoved( pageName ); 1119 } 1120 } 1121 } 1122}