001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki; 020 021import java.io.BufferedInputStream; 022import java.io.BufferedOutputStream; 023import java.io.File; 024import java.io.FileInputStream; 025import java.io.FileOutputStream; 026import java.io.IOException; 027import java.io.ObjectInputStream; 028import java.io.ObjectOutputStream; 029import java.io.Serializable; 030import java.io.UnsupportedEncodingException; 031import java.security.MessageDigest; 032import java.security.NoSuchAlgorithmException; 033import java.util.ArrayList; 034import java.util.Collection; 035import java.util.Collections; 036import java.util.ConcurrentModificationException; 037import java.util.HashMap; 038import java.util.HashSet; 039import java.util.Iterator; 040import java.util.Map; 041import java.util.Set; 042import java.util.TreeSet; 043 044import org.apache.commons.lang.time.StopWatch; 045import org.apache.log4j.Logger; 046import org.apache.wiki.api.exceptions.ProviderException; 047import org.apache.wiki.api.filters.BasicPageFilter; 048import org.apache.wiki.attachment.Attachment; 049import org.apache.wiki.event.WikiEvent; 050import org.apache.wiki.event.WikiEventListener; 051import org.apache.wiki.event.WikiEventUtils; 052import org.apache.wiki.event.WikiPageEvent; 053import org.apache.wiki.modules.InternalModule; 054import org.apache.wiki.providers.WikiPageProvider; 055import org.apache.wiki.util.TextUtil; 056 057/* 058 BUGS 059 060 - if a wikilink is added to a page, then removed, RefMan still thinks that 061 the page refers to the wikilink page. Hm. 062 063 - if a page is deleted, gets very confused. 064 065 - Serialization causes page attributes to be missing, when InitializablePlugins 066 are not executed properly. Thus, serialization should really also mark whether 067 a page is serializable or not... 068 */ 069 070 071/* 072 A word about synchronizing: 073 074 I expect this object to be accessed in three situations: 075 - when a WikiEngine is created and it scans its wikipages 076 - when the WE saves a page 077 - when a JSP page accesses one of the WE's ReferenceManagers 078 to display a list of (un)referenced pages. 079 080 So, access to this class is fairly rare, and usually triggered by 081 user interaction. OTOH, the methods in this class use their storage 082 objects intensively (and, sorry to say, in an unoptimized manner =). 083 My deduction: using unsynchronized HashMaps etc and syncing methods 084 or code blocks is preferrable to using slow, synced storage objects. 085 We don't have iterative code here, so I'm going to use synced methods 086 for now. 087 088 Please contact me if you notice problems with ReferenceManager, and 089 especially with synchronization, or if you have suggestions about 090 syncing. 091 092 ebu@memecry.net 093*/ 094 095/** 096 * Keeps track of wikipage references: 097 * <UL> 098 * <LI>What pages a given page refers to 099 * <LI>What pages refer to a given page 100 * </UL> 101 * 102 * This is a quick'n'dirty approach without any finesse in storage and 103 * searching algorithms; we trust java.util.*. 104 * <P> 105 * This class contains two HashMaps, m_refersTo and m_referredBy. The 106 * first is indexed by WikiPage names and contains a Collection of all 107 * WikiPages the page refers to. (Multiple references are not counted, 108 * naturally.) The second is indexed by WikiPage names and contains 109 * a Set of all pages that refer to the indexing page. (Notice - 110 * the keys of both Maps should be kept in sync.) 111 * <P> 112 * When a page is added or edited, its references are parsed, a Collection 113 * is received, and we crudely replace anything previous with this new 114 * Collection. We then check each referenced page name and make sure they 115 * know they are referred to by the new page. 116 * <P> 117 * Based on this information, we can perform non-optimal searches for 118 * e.g. unreferenced pages, top ten lists, etc. 119 * <P> 120 * The owning class must take responsibility of filling in any pre-existing 121 * information, probably by loading each and every WikiPage and calling this 122 * class to update the references when created. 123 * 124 * @since 1.6.1 125 */ 126 127// FIXME: The way that we save attributes is now a major booboo, and must be 128// replace forthwith. However, this is a workaround for the great deal 129// of problems that occur here... 130 131public class ReferenceManager 132 extends BasicPageFilter 133 implements InternalModule, WikiEventListener 134{ 135 /** Maps page wikiname to a Collection of pages it refers to. The Collection 136 * must contain Strings. The Collection may contain names of non-existing 137 * pages. 138 */ 139 private Map<String,Collection<String>> m_refersTo; 140 private Map<String,Collection<String>> m_unmutableRefersTo; 141 142 /** Maps page wikiname to a Set of referring pages. The Set must 143 * contain Strings. Non-existing pages (a reference exists, but not a file 144 * for the page contents) may have an empty Set in m_referredBy. 145 */ 146 private Map<String,Set<String>> m_referredBy; 147 private Map<String,Set<String>> m_unmutableReferredBy; 148 149 /** The WikiEngine that owns this object. */ 150 private WikiEngine m_engine; 151 152 private boolean m_matchEnglishPlurals = false; 153 154 private static Logger log = Logger.getLogger(ReferenceManager.class); 155 156 private static final String SERIALIZATION_FILE = "refmgr.ser"; 157 private static final String SERIALIZATION_DIR = "refmgr-attr"; 158 159 /** We use this also a generic serialization id */ 160 private static final long serialVersionUID = 4L; 161 162 /** 163 * Builds a new ReferenceManager. 164 * 165 * @param engine The WikiEngine to which this is managing references to. 166 */ 167 public ReferenceManager( WikiEngine engine ) 168 { 169 m_refersTo = new HashMap<String,Collection<String>>(); 170 m_referredBy = new HashMap<String,Set<String>>(); 171 m_engine = engine; 172 173 m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), 174 WikiEngine.PROP_MATCHPLURALS, 175 m_matchEnglishPlurals ); 176 177 // 178 // Create two maps that contain unmutable versions of the two basic maps. 179 // 180 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 181 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 182 } 183 184 /** 185 * Does a full reference update. Does not sync; assumes that you do it afterwards. 186 */ 187 private void updatePageReferences( WikiPage page ) throws ProviderException 188 { 189 String content = m_engine.getPageManager().getPageText( page.getName(), 190 WikiPageProvider.LATEST_VERSION ); 191 192 TreeSet<String> res = new TreeSet<String>(); 193 Collection<String> links = m_engine.scanWikiLinks( page, content ); 194 195 res.addAll( links ); 196 Collection attachments = m_engine.getAttachmentManager().listAttachments( page ); 197 198 for( Iterator atti = attachments.iterator(); atti.hasNext(); ) 199 { 200 res.add( ((Attachment)(atti.next())).getName() ); 201 } 202 203 internalUpdateReferences( page.getName(), res ); 204 } 205 206 /** 207 * Initializes the entire reference manager with the initial set of pages 208 * from the collection. 209 * 210 * @param pages A collection of all pages you want to be included in the reference 211 * count. 212 * @since 2.2 213 * @throws ProviderException If reading of pages fail. 214 */ 215 public void initialize( Collection pages ) 216 throws ProviderException 217 { 218 log.debug( "Initializing new ReferenceManager with "+pages.size()+" initial pages." ); 219 StopWatch sw = new StopWatch(); 220 sw.start(); 221 log.info( "Starting cross reference scan of WikiPages" ); 222 223 // 224 // First, try to serialize old data from disk. If that fails, 225 // we'll go and update the entire reference lists (which'll take 226 // time) 227 // 228 try 229 { 230 // 231 // Unserialize things. The loop below cannot be combined with 232 // the other loop below, simply because engine.getPage() has 233 // side effects such as loading initializing the user databases, 234 // which in turn want all of the pages to be read already... 235 // 236 // Yes, this is a kludge. We know. Will be fixed. 237 // 238 long saved = unserializeFromDisk(); 239 240 for( Iterator it = pages.iterator(); it.hasNext(); ) 241 { 242 WikiPage page = (WikiPage) it.next(); 243 244 unserializeAttrsFromDisk( page ); 245 } 246 247 // 248 // Now we must check if any of the pages have been changed 249 // while we were in the electronic la-la-land, and update 250 // the references for them. 251 // 252 253 Iterator it = pages.iterator(); 254 255 while( it.hasNext() ) 256 { 257 WikiPage page = (WikiPage) it.next(); 258 259 if( page instanceof Attachment ) 260 { 261 // Skip attachments 262 } 263 else 264 { 265 266 // Refresh with the latest copy 267 page = m_engine.getPage( page.getName() ); 268 269 if( page.getLastModified() == null ) 270 { 271 log.fatal( "Provider returns null lastModified. Please submit a bug report." ); 272 } 273 else if( page.getLastModified().getTime() > saved ) 274 { 275 updatePageReferences( page ); 276 } 277 } 278 } 279 280 } 281 catch( Exception e ) 282 { 283 log.info("Unable to unserialize old refmgr information, rebuilding database: "+e.getMessage()); 284 buildKeyLists( pages ); 285 286 // Scan the existing pages from disk and update references in the manager. 287 Iterator it = pages.iterator(); 288 while( it.hasNext() ) 289 { 290 WikiPage page = (WikiPage)it.next(); 291 292 if( page instanceof Attachment ) 293 { 294 // We cannot build a reference list from the contents 295 // of attachments, so we skip them. 296 } 297 else 298 { 299 updatePageReferences( page ); 300 301 serializeAttrsToDisk( page ); 302 } 303 304 } 305 306 serializeToDisk(); 307 } 308 309 sw.stop(); 310 log.info( "Cross reference scan done in "+sw ); 311 312 WikiEventUtils.addWikiEventListener(m_engine.getPageManager(), 313 WikiPageEvent.PAGE_DELETED, this); 314 } 315 316 /** 317 * Reads the serialized data from the disk back to memory. 318 * Returns the date when the data was last written on disk 319 */ 320 @SuppressWarnings("unchecked") 321 private synchronized long unserializeFromDisk() 322 throws IOException, 323 ClassNotFoundException 324 { 325 ObjectInputStream in = null; 326 long saved = 0L; 327 328 try 329 { 330 StopWatch sw = new StopWatch(); 331 sw.start(); 332 333 File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 334 335 in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) ); 336 337 long ver = in.readLong(); 338 339 if( ver != serialVersionUID ) 340 { 341 throw new IOException("File format has changed; I need to recalculate references."); 342 } 343 344 saved = in.readLong(); 345 m_refersTo = (Map) in.readObject(); 346 m_referredBy = (Map) in.readObject(); 347 348 in.close(); 349 350 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 351 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 352 353 sw.stop(); 354 log.debug("Read serialized data successfully in "+sw); 355 } 356 finally 357 { 358 if( in != null ) in.close(); 359 } 360 361 return saved; 362 } 363 364 /** 365 * Serializes hashmaps to disk. The format is private, don't touch it. 366 */ 367 private synchronized void serializeToDisk() 368 { 369 ObjectOutputStream out = null; 370 371 try 372 { 373 StopWatch sw = new StopWatch(); 374 sw.start(); 375 376 File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 377 378 out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) ); 379 380 out.writeLong( serialVersionUID ); 381 out.writeLong( System.currentTimeMillis() ); // Timestamp 382 out.writeObject( m_refersTo ); 383 out.writeObject( m_referredBy ); 384 385 out.close(); 386 387 sw.stop(); 388 389 log.debug("serialization done - took "+sw); 390 } 391 catch( IOException e ) 392 { 393 log.error("Unable to serialize!"); 394 395 try 396 { 397 if( out != null ) out.close(); 398 } 399 catch( IOException ex ) {} 400 } 401 } 402 403 private String getHashFileName( String pageName ) 404 throws NoSuchAlgorithmException 405 { 406 MessageDigest digest = MessageDigest.getInstance("MD5"); 407 408 byte[] dig; 409 try 410 { 411 dig = digest.digest( pageName.getBytes("UTF-8") ); 412 } 413 catch (UnsupportedEncodingException e) 414 { 415 throw new InternalWikiException("AAAAGH! UTF-8 is gone! My eyes! It burns...!", e); 416 } 417 418 return TextUtil.toHexString(dig)+".cache"; 419 } 420 421 /** 422 * Reads the serialized data from the disk back to memory. 423 * Returns the date when the data was last written on disk 424 */ 425 private synchronized long unserializeAttrsFromDisk(WikiPage p) 426 throws IOException, 427 ClassNotFoundException 428 { 429 ObjectInputStream in = null; 430 long saved = 0L; 431 432 try 433 { 434 StopWatch sw = new StopWatch(); 435 sw.start(); 436 437 // 438 // Find attribute cache, and check if it exists 439 // 440 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 441 442 f = new File( f, getHashFileName(p.getName()) ); 443 444 if( !f.exists() ) 445 { 446 return 0L; 447 } 448 449 log.debug("Deserializing attributes for "+p.getName()); 450 451 in = new ObjectInputStream( new BufferedInputStream(new FileInputStream(f)) ); 452 453 long ver = in.readLong(); 454 455 if( ver != serialVersionUID ) 456 { 457 log.debug("File format has changed; cannot deserialize."); 458 return 0L; 459 } 460 461 saved = in.readLong(); 462 463 String name = in.readUTF(); 464 465 if( !name.equals(p.getName()) ) 466 { 467 log.debug("File name does not match ("+name+"), skipping..."); 468 return 0L; // Not here 469 } 470 471 long entries = in.readLong(); 472 473 for( int i = 0; i < entries; i++ ) 474 { 475 String key = in.readUTF(); 476 Object value = in.readObject(); 477 478 p.setAttribute( key, value ); 479 480 log.debug(" attr: "+key+"="+value); 481 } 482 483 in.close(); 484 485 sw.stop(); 486 log.debug("Read serialized data for "+name+" successfully in "+sw); 487 p.setHasMetadata(); 488 } 489 catch( NoSuchAlgorithmException e ) 490 { 491 log.fatal("No MD5!?!"); 492 } 493 finally 494 { 495 if( in != null ) in.close(); 496 } 497 498 return saved; 499 } 500 501 /** 502 * Serializes hashmaps to disk. The format is private, don't touch it. 503 */ 504 private synchronized void serializeAttrsToDisk( WikiPage p ) 505 { 506 ObjectOutputStream out = null; 507 StopWatch sw = new StopWatch(); 508 sw.start(); 509 510 try 511 { 512 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 513 514 if( !f.exists() ) f.mkdirs(); 515 516 // 517 // Create a digest for the name 518 // 519 f = new File( f, getHashFileName(p.getName()) ); 520 521 // FIXME: There is a concurrency issue here... 522 Set entries = p.getAttributes().entrySet(); 523 524 if( entries.size() == 0 ) 525 { 526 // Nothing to serialize, therefore we will just simply remove the 527 // serialization file so that the next time we boot, we don't 528 // deserialize old data. 529 f.delete(); 530 return; 531 } 532 533 out = new ObjectOutputStream( new BufferedOutputStream(new FileOutputStream(f)) ); 534 535 out.writeLong( serialVersionUID ); 536 out.writeLong( System.currentTimeMillis() ); // Timestamp 537 538 out.writeUTF( p.getName() ); 539 out.writeLong( entries.size() ); 540 541 for( Iterator i = entries.iterator(); i.hasNext(); ) 542 { 543 Map.Entry e = (Map.Entry) i.next(); 544 545 if( e.getValue() instanceof Serializable ) 546 { 547 out.writeUTF( (String)e.getKey() ); 548 out.writeObject( e.getValue() ); 549 } 550 } 551 552 out.close(); 553 554 } 555 catch( IOException e ) 556 { 557 log.error("Unable to serialize!"); 558 559 try 560 { 561 if( out != null ) out.close(); 562 } 563 catch( IOException ex ) {} 564 } 565 catch( NoSuchAlgorithmException e ) 566 { 567 log.fatal("No MD5 algorithm!?!"); 568 } 569 finally 570 { 571 sw.stop(); 572 573 log.debug("serialization for "+p.getName()+" done - took "+sw); 574 } 575 } 576 577 /** 578 * After the page has been saved, updates the reference lists. 579 * 580 * @param context {@inheritDoc} 581 * @param content {@inheritDoc} 582 */ 583 public void postSave( WikiContext context, String content ) 584 { 585 WikiPage page = context.getPage(); 586 587 updateReferences( page.getName(), 588 context.getEngine().scanWikiLinks( page, content ) ); 589 590 serializeAttrsToDisk( page ); 591 } 592 593 /** 594 * Updates the m_referedTo and m_referredBy hashmaps when a page has been 595 * deleted. 596 * <P> 597 * Within the m_refersTo map the pagename is a key. The whole key-value-set 598 * has to be removed to keep the map clean. 599 * Within the m_referredBy map the name is stored as a value. Since a key 600 * can have more than one value we have to delete just the key-value-pair 601 * referring page:deleted page. 602 * 603 * @param page Name of the page to remove from the maps. 604 */ 605 public synchronized void pageRemoved( WikiPage page ) 606 { 607 String pageName = page.getName(); 608 609 pageRemoved(pageName); 610 } 611 612 private void pageRemoved(String pageName) 613 { 614 Collection<String> refTo = m_refersTo.get( pageName ); 615 616 if( refTo != null ) 617 { 618 Iterator itRefTo = refTo.iterator(); 619 while( itRefTo.hasNext() ) 620 { 621 String referredPageName = (String)itRefTo.next(); 622 Set<String> refBy = m_referredBy.get( referredPageName ); 623 624 if( refBy == null ) 625 throw new InternalWikiException("Refmgr out of sync: page "+pageName+" refers to "+referredPageName+", which has null referrers."); 626 627 refBy.remove(pageName); 628 629 m_referredBy.remove( referredPageName ); 630 631 // We won't put it back again if it becomes empty and does not exist. It will be added 632 // later on anyway, if it becomes referenced again. 633 if( !(refBy.isEmpty() && !m_engine.pageExists(referredPageName)) ) 634 { 635 m_referredBy.put( referredPageName, refBy ); 636 } 637 } 638 639 log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName )); 640 m_refersTo.remove( pageName ); 641 } 642 643 Set<String> refBy = m_referredBy.get( pageName ); 644 if( refBy == null || refBy.isEmpty() ) 645 { 646 m_referredBy.remove( pageName ); 647 } 648 649 // 650 // Remove any traces from the disk, too 651 // 652 serializeToDisk(); 653 654 try 655 { 656 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 657 658 f = new File( f, getHashFileName(pageName) ); 659 660 if( f.exists() ) f.delete(); 661 } 662 catch( NoSuchAlgorithmException e ) 663 { 664 log.error("What do you mean - no such algorithm?", e); 665 } 666 } 667 668 /** 669 * Updates the referred pages of a new or edited WikiPage. If a refersTo 670 * entry for this page already exists, it is removed and a new one is built 671 * from scratch. Also calls updateReferredBy() for each referenced page. 672 * <P> 673 * This is the method to call when a new page has been created and we 674 * want to a) set up its references and b) notify the referred pages 675 * of the references. Use this method during run-time. 676 * 677 * @param page Name of the page to update. 678 * @param references A Collection of Strings, each one pointing to a page this page references. 679 */ 680 public synchronized void updateReferences( String page, Collection< String > references ) 681 { 682 internalUpdateReferences(page, references); 683 684 serializeToDisk(); 685 } 686 687 /** 688 * Updates the referred pages of a new or edited WikiPage. If a refersTo 689 * entry for this page already exists, it is removed and a new one is built 690 * from scratch. Also calls updateReferredBy() for each referenced page. 691 * <p> 692 * This method does not synchronize the database to disk. 693 * 694 * @param page Name of the page to update. 695 * @param references A Collection of Strings, each one pointing to a page this page references. 696 */ 697 698 private void internalUpdateReferences(String page, Collection< String > references) 699 { 700 page = getFinalPageName( page ); 701 702 // 703 // Create a new entry in m_refersTo. 704 // 705 Collection oldRefTo = m_refersTo.get( page ); 706 m_refersTo.remove( page ); 707 708 TreeSet<String> cleanedRefs = new TreeSet<String>(); 709 for( Iterator< String > i = references.iterator(); i.hasNext(); ) 710 { 711 String ref = i.next(); 712 713 ref = getFinalPageName( ref ); 714 715 cleanedRefs.add( ref ); 716 } 717 718 m_refersTo.put( page, cleanedRefs ); 719 720 // 721 // We know the page exists, since it's making references somewhere. 722 // If an entry for it didn't exist previously in m_referredBy, make 723 // sure one is added now. 724 // 725 if( !m_referredBy.containsKey( page ) ) 726 { 727 m_referredBy.put( page, new TreeSet<String>() ); 728 } 729 730 // 731 // Get all pages that used to be referred to by 'page' and 732 // remove that reference. (We don't want to try to figure out 733 // which particular references were removed...) 734 // 735 cleanReferredBy( page, oldRefTo, cleanedRefs ); 736 737 // 738 // Notify all referred pages of their referinesshoodicity. 739 // 740 Iterator<String> it = cleanedRefs.iterator(); 741 while( it.hasNext() ) 742 { 743 String referredPageName = it.next(); 744 updateReferredBy( getFinalPageName(referredPageName), page ); 745 } 746 } 747 748 /** 749 * Returns the refers-to list. For debugging. 750 * 751 * @return The refers-to list. 752 */ 753 protected Map getRefersTo() 754 { 755 return m_refersTo; 756 } 757 758 /** 759 * Returns the referred-by list. For debugging. 760 * 761 * @return Referred-by lists. 762 */ 763 protected Map getReferredBy() 764 { 765 return m_referredBy; 766 } 767 768 /** 769 * Cleans the 'referred by' list, removing references by 'referrer' to 770 * any other page. Called after 'referrer' is removed. 771 */ 772 private void cleanReferredBy( String referrer, 773 Collection oldReferred, 774 Collection<String> newReferred ) 775 { 776 // Two ways to go about this. One is to look up all pages previously 777 // referred by referrer and remove referrer from their lists, and let 778 // the update put them back in (except possibly removed ones). 779 // The other is to get the old referred to list, compare to the new, 780 // and tell the ones missing in the latter to remove referrer from 781 // their list. Hm. We'll just try the first for now. Need to come 782 // back and optimize this a bit. 783 784 if( oldReferred == null ) 785 return; 786 787 Iterator it = oldReferred.iterator(); 788 while( it.hasNext() ) 789 { 790 String referredPage = (String)it.next(); 791 Set oldRefBy = m_referredBy.get( referredPage ); 792 if( oldRefBy != null ) 793 { 794 oldRefBy.remove( referrer ); 795 } 796 797 // If the page is referred to by no one AND it doesn't even 798 // exist, we might just as well forget about this entry. 799 // It will be added again elsewhere if new references appear. 800 if( ( ( oldRefBy == null ) || ( oldRefBy.isEmpty() ) ) && 801 ( m_engine.pageExists( referredPage ) == false ) ) 802 { 803 m_referredBy.remove( referredPage ); 804 } 805 } 806 807 } 808 809 810 /** 811 * When initially building a ReferenceManager from scratch, call this method 812 * BEFORE calling updateReferences() with a full list of existing page names. 813 * It builds the refersTo and referredBy key lists, thus enabling 814 * updateReferences() to function correctly. 815 * <P> 816 * This method should NEVER be called after initialization. It clears all mappings 817 * from the reference tables. 818 * 819 * @param pages a Collection containing WikiPage objects. 820 */ 821 private synchronized void buildKeyLists( Collection pages ) 822 { 823 m_refersTo.clear(); 824 m_referredBy.clear(); 825 826 if( pages == null ) 827 return; 828 829 Iterator it = pages.iterator(); 830 try 831 { 832 while( it.hasNext() ) 833 { 834 WikiPage page = (WikiPage)it.next(); 835 // We add a non-null entry to referredBy to indicate the referred page exists 836 m_referredBy.put( page.getName(), new TreeSet<String>() ); 837 // Just add a key to refersTo; the keys need to be in sync with referredBy. 838 m_refersTo.put( page.getName(), null ); 839 } 840 } 841 catch( ClassCastException e ) 842 { 843 log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e ); 844 } 845 } 846 847 848 /** 849 * Marks the page as referred to by the referrer. If the page does not 850 * exist previously, nothing is done. (This means that some page, somewhere, 851 * has a link to a page that does not exist.) 852 * <P> 853 * This method is NOT synchronized. It should only be referred to from 854 * within a synchronized method, or it should be made synced if necessary. 855 */ 856 private void updateReferredBy( String page, String referrer ) 857 { 858 // We're not really interested in first level self-references. 859 /* 860 if( page.equals( referrer ) ) 861 { 862 return; 863 } 864 */ 865 // Neither are we interested if plural forms refer to each other. 866 if( m_matchEnglishPlurals ) 867 { 868 String p2 = page.endsWith("s") ? page.substring(0,page.length()-1) : page+"s"; 869 870 if( referrer.equals(p2) ) 871 { 872 return; 873 } 874 } 875 876 Set<String> referrers = m_referredBy.get( page ); 877 878 // Even if 'page' has not been created yet, it can still be referenced. 879 // This requires we don't use m_referredBy keys when looking up missing 880 // pages, of course. 881 if(referrers == null) 882 { 883 referrers = new TreeSet<String>(); 884 m_referredBy.put( page, referrers ); 885 } 886 referrers.add( referrer ); 887 } 888 889 890 /** 891 * Clears the references to a certain page so it's no longer in the map. 892 * 893 * @param pagename Name of the page to clear references for. 894 */ 895 public synchronized void clearPageEntries( String pagename ) 896 { 897 pagename = getFinalPageName(pagename); 898 899 // 900 // Remove this item from the referredBy list of any page 901 // which this item refers to. 902 // 903 Collection<String> c = m_refersTo.get( pagename ); 904 905 if( c != null ) 906 { 907 for( String key : c ) 908 { 909 Collection<?> dref = m_referredBy.get( key ); 910 911 dref.remove( pagename ); 912 } 913 } 914 915 // 916 // Finally, remove direct references. 917 // 918 m_referredBy.remove( pagename ); 919 m_refersTo.remove( pagename ); 920 } 921 922 923 /** 924 * Finds all unreferenced pages. This requires a linear scan through 925 * m_referredBy to locate keys with null or empty values. 926 * 927 * @return The Collection of Strings 928 */ 929 public synchronized Collection findUnreferenced() 930 { 931 ArrayList<String> unref = new ArrayList<String>(); 932 933 for( String key : m_referredBy.keySet() ) 934 { 935 Set<?> refs = getReferenceList( m_referredBy, key ); 936 937 if( refs == null || refs.isEmpty() ) 938 { 939 unref.add( key ); 940 } 941 } 942 943 return unref; 944 } 945 946 947 /** 948 * Finds all references to non-existant pages. This requires a linear 949 * scan through m_refersTo values; each value must have a corresponding 950 * key entry in the reference Maps, otherwise such a page has never 951 * been created. 952 * <P> 953 * Returns a Collection containing Strings of unreferenced page names. 954 * Each non-existant page name is shown only once - we don't return information 955 * on who referred to it. 956 * 957 * @return A Collection of Strings 958 */ 959 public synchronized Collection findUncreated() 960 { 961 TreeSet<String> uncreated = new TreeSet<String>(); 962 963 // Go through m_refersTo values and check that m_refersTo has the corresponding keys. 964 // We want to reread the code to make sure our HashMaps are in sync... 965 966 Collection<Collection<String>> allReferences = m_refersTo.values(); 967 968 for( Collection<String> refs : allReferences ) 969 { 970 if( refs != null ) 971 { 972 for( String aReference : refs ) 973 { 974 if( m_engine.pageExists( aReference ) == false ) 975 { 976 uncreated.add( aReference ); 977 } 978 } 979 } 980 } 981 982 return uncreated; 983 } 984 985 /** 986 * Searches for the given page in the given Map, and returns 987 * the set of references. This method also takes care of English plural 988 * matching. 989 * 990 * @param coll The Map to search in 991 * @param pagename The name to find. 992 * @return The references list. 993 */ 994 private <T> Set<T> getReferenceList( Map<String,Set<T>> coll, String pagename ) 995 { 996 Set<T> refs = coll.get( pagename ); 997 998 if( m_matchEnglishPlurals ) 999 { 1000 // 1001 // We'll add also matches from the "other" page. 1002 // 1003 Set<T> refs2; 1004 1005 if( pagename.endsWith("s") ) 1006 { 1007 refs2 = coll.get( pagename.substring(0,pagename.length()-1) ); 1008 } 1009 else 1010 { 1011 refs2 = coll.get( pagename+"s" ); 1012 } 1013 1014 if( refs2 != null ) 1015 { 1016 if( refs != null ) 1017 refs.addAll( refs2 ); 1018 else 1019 refs = refs2; 1020 } 1021 } 1022 return refs; 1023 } 1024 1025 /** 1026 * Find all pages that refer to this page. Returns null if the page 1027 * does not exist or is not referenced at all, otherwise returns a 1028 * collection containing page names (String) that refer to this one. 1029 * <p> 1030 * @param pagename The page to find referrers for. 1031 * @return A Collection of Strings. (This is, in fact, a Set, and is likely 1032 * to change at some point to a Set). May return null, if the page 1033 * does not exist, or if it has no references. 1034 */ 1035 // FIXME: Return a Set instead of a Collection. 1036 public synchronized Collection findReferrers( String pagename ) 1037 { 1038 Set<String> refs = getReferenceList( m_referredBy, pagename ); 1039 1040 if( refs == null || refs.isEmpty() ) 1041 { 1042 return null; 1043 } 1044 1045 return refs; 1046 1047 } 1048 1049 /** 1050 * Returns all pages that refer to this page. Note that this method 1051 * returns an unmodifiable Map, which may be abruptly changed. So any 1052 * access to any iterator may result in a ConcurrentModificationException. 1053 * <p> 1054 * The advantages of using this method over findReferrers() is that 1055 * it is very fast, as it does not create a new object. The disadvantages 1056 * are that it does not do any mapping between plural names, and you 1057 * may end up getting a ConcurrentModificationException. 1058 * 1059 * @param pageName Page name to query. 1060 * @return A Set of Strings containing the names of all the pages that refer 1061 * to this page. May return null, if the page does not exist or 1062 * has not been indexed yet. 1063 * @since 2.2.33 1064 */ 1065 public Set findReferredBy( String pageName ) 1066 { 1067 return m_unmutableReferredBy.get( getFinalPageName(pageName) ); 1068 } 1069 1070 /** 1071 * Returns all pages that this page refers to. You can use this as a quick 1072 * way of getting the links from a page, but note that it does not link any 1073 * InterWiki, image, or external links. It does contain attachments, though. 1074 * <p> 1075 * The Collection returned is unmutable, so you cannot change it. It does reflect 1076 * the current status and thus is a live object. So, if you are using any 1077 * kind of an iterator on it, be prepared for ConcurrentModificationExceptions. 1078 * <p> 1079 * The returned value is a Collection, because a page may refer to another page 1080 * multiple times. 1081 * 1082 * @param pageName Page name to query 1083 * @return A Collection of Strings containing the names of the pages that this page 1084 * refers to. May return null, if the page does not exist or has not 1085 * been indexed yet. 1086 * @since 2.2.33 1087 */ 1088 public Collection findRefersTo( String pageName ) 1089 { 1090 return m_unmutableRefersTo.get( getFinalPageName(pageName) ); 1091 } 1092 1093 /** 1094 * This 'deepHashCode' can be used to determine if there were any 1095 * modifications made to the underlying to and by maps of the 1096 * ReferenceManager. The maps of the ReferenceManager are not 1097 * synchronized, so someone could add/remove entries in them while the 1098 * hashCode is being computed. 1099 * 1100 * @return Sum of the hashCodes for the to and by maps of the 1101 * ReferenceManager 1102 * @since 2.3.24 1103 */ 1104 // 1105 // This method traps and retries if a concurrent 1106 // modifcaition occurs. 1107 // TODO: It is unnecessary to calculate the hashcode; it should be calculated only 1108 // when the hashmaps are changed. This is slow. 1109 // 1110 public int deepHashCode() 1111 { 1112 boolean failed = true; 1113 int signature = 0; 1114 1115 while (failed) 1116 { 1117 signature = 0; 1118 try 1119 { 1120 signature ^= m_referredBy.hashCode(); 1121 signature ^= m_refersTo.hashCode(); 1122 failed = false; 1123 } 1124 catch (ConcurrentModificationException e) 1125 { 1126 Thread.yield(); 1127 } 1128 } 1129 1130 return signature; 1131 } 1132 1133 /** 1134 * Returns a list of all pages that the ReferenceManager knows about. 1135 * This should be roughly equivalent to PageManager.getAllPages(), but without 1136 * the potential disk access overhead. Note that this method is not guaranteed 1137 * to return a Set of really all pages (especially during startup), but it is 1138 * very fast. 1139 * 1140 * @return A Set of all defined page names that ReferenceManager knows about. 1141 * @since 2.3.24 1142 */ 1143 public Set findCreated() 1144 { 1145 return new HashSet<String>( m_refersTo.keySet() ); 1146 } 1147 1148 private String getFinalPageName( String orig ) 1149 { 1150 try 1151 { 1152 String s = m_engine.getFinalPageName( orig ); 1153 1154 if( s == null ) s = orig; 1155 1156 return s; 1157 } 1158 catch( ProviderException e ) 1159 { 1160 log.error("Error while trying to fetch a page name; trying to cope with the situation.",e); 1161 1162 return orig; 1163 } 1164 } 1165 1166 /** 1167 * {@inheritDoc} 1168 */ 1169 public void actionPerformed(WikiEvent event) 1170 { 1171 if( (event instanceof WikiPageEvent) && (event.getType() == WikiPageEvent.PAGE_DELETED) ) 1172 { 1173 String pageName = ((WikiPageEvent) event).getPageName(); 1174 1175 if( pageName != null ) 1176 { 1177 pageRemoved( pageName ); 1178 } 1179 } 1180 } 1181}