001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.references; 020 021import org.apache.commons.lang3.time.StopWatch; 022import org.apache.log4j.Logger; 023import org.apache.wiki.InternalWikiException; 024import org.apache.wiki.LinkCollector; 025import org.apache.wiki.api.core.Attachment; 026import org.apache.wiki.api.core.Context; 027import org.apache.wiki.api.core.Engine; 028import org.apache.wiki.api.core.Page; 029import org.apache.wiki.api.exceptions.ProviderException; 030import org.apache.wiki.api.filters.BasePageFilter; 031import org.apache.wiki.api.providers.PageProvider; 032import org.apache.wiki.api.providers.WikiProvider; 033import org.apache.wiki.api.spi.Wiki; 034import org.apache.wiki.attachment.AttachmentManager; 035import org.apache.wiki.event.WikiEvent; 036import org.apache.wiki.event.WikiEventManager; 037import org.apache.wiki.event.WikiPageEvent; 038import org.apache.wiki.pages.PageManager; 039import org.apache.wiki.render.RenderingManager; 040import org.apache.wiki.util.TextUtil; 041 042import java.io.BufferedInputStream; 043import java.io.BufferedOutputStream; 044import java.io.File; 045import java.io.FileInputStream; 046import java.io.FileOutputStream; 047import java.io.IOException; 048import java.io.ObjectInputStream; 049import java.io.ObjectOutputStream; 050import java.io.Serializable; 051import java.nio.charset.StandardCharsets; 052import java.security.MessageDigest; 053import java.security.NoSuchAlgorithmException; 054import java.util.ArrayList; 055import java.util.Collection; 056import java.util.Collections; 057import java.util.ConcurrentModificationException; 058import java.util.HashMap; 059import java.util.HashSet; 060import java.util.List; 061import java.util.Map; 062import java.util.Set; 063import java.util.TreeSet; 064 065/* 066 BUGS 067 068 - if a wikilink is added to a page, then removed, RefMan still thinks that the page refers to the wikilink page. Hm. 069 070 - if a page is deleted, gets very confused. 071 072 - Serialization causes page attributes to be missing, when InitializablePlugins are not executed properly. Thus, 073 serialization should really also mark whether a page is serializable or not... 074 */ 075 076 077/* 078 A word about synchronizing: 079 080 I expect this object to be accessed in three situations: 081 - when a Engine is created and it scans its wikipages 082 - when the WE saves a page 083 - when a JSP page accesses one of the WE's ReferenceManagers to display a list of (un)referenced pages. 084 085 So, access to this class is fairly rare, and usually triggered by user interaction. OTOH, the methods in this class use their storage 086 objects intensively (and, sorry to say, in an unoptimized manner =). My deduction: using unsynchronized HashMaps etc and syncing methods 087 or code blocks is preferrable to using slow, synced storage objects. We don't have iterative code here, so I'm going to use synced 088 methods for now. 089 090 Please contact me if you notice problems with ReferenceManager, and especially with synchronization, or if you have suggestions about 091 syncing. 092 093 ebu@memecry.net 094*/ 095 096/** 097 * Keeps track of wikipage references: 098 * <UL> 099 * <LI>What pages a given page refers to 100 * <LI>What pages refer to a given page 101 * </UL> 102 * 103 * This is a quick'n'dirty approach without any finesse in storage and searching algorithms; we trust java.util.*. 104 * <P> 105 * This class contains two HashMaps, m_refersTo and m_referredBy. The first is indexed by WikiPage names and contains a Collection of all 106 * WikiPages the page refers to. (Multiple references are not counted, naturally.) The second is indexed by WikiPage names and contains 107 * a Set of all pages that refer to the indexing page. (Notice - the keys of both Maps should be kept in sync.) 108 * <P> 109 * When a page is added or edited, its references are parsed, a Collection is received, and we crudely replace anything previous with 110 * this new Collection. We then check each referenced page name and make sure they know they are referred to by the new page. 111 * <P> 112 * Based on this information, we can perform non-optimal searches for e.g. unreferenced pages, top ten lists, etc. 113 * <P> 114 * The owning class must take responsibility of filling in any pre-existing information, probably by loading each and every WikiPage 115 * and calling this class to update the references when created. 116 * 117 * @since 1.6.1 (as of 2.11.0, moved to org.apache.wiki.references) 118 */ 119 120// FIXME: The way that we save attributes is now a major booboo, and must be 121// replace forthwith. However, this is a workaround for the great deal 122// of problems that occur here... 123public class DefaultReferenceManager extends BasePageFilter implements ReferenceManager { 124 125 /** 126 * Maps page wikiname to a Collection of pages it refers to. The Collection must contain Strings. The Collection may contain 127 * names of non-existing pages. 128 */ 129 private Map< String, Collection< String > > m_refersTo; 130 private Map< String, Collection< String > > m_unmutableRefersTo; 131 132 /** 133 * Maps page wikiname to a Set of referring pages. The Set must contain Strings. Non-existing pages (a reference exists, but 134 * not a file for the page contents) may have an empty Set in m_referredBy. 135 */ 136 private Map< String, Set< String > > m_referredBy; 137 private Map< String, Set< String > > m_unmutableReferredBy; 138 139 private boolean m_matchEnglishPlurals; 140 141 private static final Logger log = Logger.getLogger( DefaultReferenceManager.class); 142 private static final String SERIALIZATION_FILE = "refmgr.ser"; 143 private static final String SERIALIZATION_DIR = "refmgr-attr"; 144 145 /** We use this also a generic serialization id */ 146 private static final long serialVersionUID = 4L; 147 148 /** 149 * Builds a new ReferenceManager. 150 * 151 * @param engine The Engine to which this is managing references to. 152 */ 153 public DefaultReferenceManager( final Engine engine ) { 154 m_refersTo = new HashMap<>(); 155 m_referredBy = new HashMap<>(); 156 m_engine = engine; 157 m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), Engine.PROP_MATCHPLURALS, false ); 158 159 // 160 // Create two maps that contain unmutable versions of the two basic maps. 161 // 162 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 163 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 164 } 165 166 /** 167 * Does a full reference update. Does not sync; assumes that you do it afterwards. 168 */ 169 private void updatePageReferences( final Page page ) throws ProviderException { 170 final String content = m_engine.getManager( PageManager.class ).getPageText( page.getName(), PageProvider.LATEST_VERSION ); 171 final Collection< String > links = scanWikiLinks( page, content ); 172 final TreeSet< String > res = new TreeSet<>( links ); 173 final List< Attachment > attachments = m_engine.getManager( AttachmentManager.class ).listAttachments( page ); 174 for( final Attachment att : attachments ) { 175 res.add( att.getName() ); 176 } 177 178 internalUpdateReferences( page.getName(), res ); 179 } 180 181 /** 182 * Initializes the entire reference manager with the initial set of pages from the collection. 183 * 184 * @param pages A collection of all pages you want to be included in the reference count. 185 * @since 2.2 186 * @throws ProviderException If reading of pages fails. 187 */ 188 @Override 189 public void initialize( final Collection< Page > pages ) throws ProviderException { 190 log.debug( "Initializing new ReferenceManager with " + pages.size() + " initial pages." ); 191 final StopWatch sw = new StopWatch(); 192 sw.start(); 193 log.info( "Starting cross reference scan of WikiPages" ); 194 195 // First, try to serialize old data from disk. If that fails, we'll go and update the entire reference lists (which'll take time) 196 try { 197 // Unserialize things. The loop below cannot be combined with the other loop below, simply because 198 // engine.getPage() has side effects such as loading initializing the user databases, which in turn want all 199 // of the pages to be read already... 200 // 201 // Yes, this is a kludge. We know. Will be fixed. 202 final long saved = unserializeFromDisk(); 203 204 for( final Page page : pages ) { 205 unserializeAttrsFromDisk( page ); 206 } 207 208 // Now we must check if any of the pages have been changed while we were in the electronic la-la-land, 209 // and update the references for them. 210 for( final Page page : pages ) { 211 if( !( page instanceof Attachment ) ) { 212 // Refresh with the latest copy 213 final Page wp = m_engine.getManager( PageManager.class ).getPage( page.getName() ); 214 215 if( wp.getLastModified() == null ) { 216 log.fatal( "Provider returns null lastModified. Please submit a bug report." ); 217 } else if( wp.getLastModified().getTime() > saved ) { 218 updatePageReferences( wp ); 219 } 220 } 221 } 222 223 } catch( final Exception e ) { 224 log.info( "Unable to unserialize old refmgr information, rebuilding database: " + e.getMessage() ); 225 buildKeyLists( pages ); 226 227 // Scan the existing pages from disk and update references in the manager. 228 for( final Page page : pages ) { 229 // We cannot build a reference list from the contents of attachments, so we skip them. 230 if( !( page instanceof Attachment ) ) { 231 updatePageReferences( page ); 232 serializeAttrsToDisk( page ); 233 } 234 } 235 236 serializeToDisk(); 237 } 238 239 sw.stop(); 240 log.info( "Cross reference scan done in "+sw ); 241 242 WikiEventManager.addWikiEventListener( m_engine.getManager( PageManager.class ), this ); 243 } 244 245 /** 246 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 247 */ 248 @SuppressWarnings("unchecked") 249 private synchronized long unserializeFromDisk() throws IOException, ClassNotFoundException { 250 final long saved; 251 252 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 253 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) { 254 final StopWatch sw = new StopWatch(); 255 sw.start(); 256 257 final long ver = in.readLong(); 258 259 if( ver != serialVersionUID ) { 260 throw new IOException("File format has changed; I need to recalculate references."); 261 } 262 263 saved = in.readLong(); 264 m_refersTo = ( Map< String, Collection< String > > ) in.readObject(); 265 m_referredBy = ( Map< String, Set< String > > ) in.readObject(); 266 267 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 268 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 269 270 sw.stop(); 271 log.debug("Read serialized data successfully in "+sw); 272 } 273 274 return saved; 275 } 276 277 /** 278 * Serializes hashmaps to disk. The format is private, don't touch it. 279 */ 280 private synchronized void serializeToDisk() { 281 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 282 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) { 283 final StopWatch sw = new StopWatch(); 284 sw.start(); 285 286 out.writeLong( serialVersionUID ); 287 out.writeLong( System.currentTimeMillis() ); // Timestamp 288 out.writeObject( m_refersTo ); 289 out.writeObject( m_referredBy ); 290 291 sw.stop(); 292 293 log.debug("serialization done - took "+sw); 294 } catch( final IOException ioe ) { 295 log.error("Unable to serialize!", ioe); 296 } 297 } 298 299 private String getHashFileName( final String pageName ) { 300 if( pageName == null ) { 301 return null; 302 } 303 try { 304 final MessageDigest digest = MessageDigest.getInstance( "MD5" ); 305 final byte[] dig = digest.digest( pageName.getBytes( StandardCharsets.UTF_8 ) ); 306 307 return TextUtil.toHexString( dig ) + ".cache"; 308 } catch( final NoSuchAlgorithmException e ) { 309 log.fatal( "What do you mean - no such algorithm?", e ); 310 return null; 311 } 312 } 313 314 /** 315 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 316 */ 317 private synchronized long unserializeAttrsFromDisk( final Page p ) throws IOException, ClassNotFoundException { 318 long saved = 0L; 319 320 // Find attribute cache, and check if it exists 321 final String hashName = getHashFileName( p.getName() ); 322 if( hashName != null ) { 323 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 324 f = new File( f, hashName ); 325 if( !f.exists() ) { 326 return 0L; 327 } 328 329 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) { 330 final StopWatch sw = new StopWatch(); 331 sw.start(); 332 log.debug( "Deserializing attributes for " + p.getName() ); 333 334 final long ver = in.readLong(); 335 if( ver != serialVersionUID ) { 336 log.debug("File format has changed; cannot deserialize."); 337 return 0L; 338 } 339 340 saved = in.readLong(); 341 final String name = in.readUTF(); 342 if( !name.equals( p.getName() ) ) { 343 log.debug("File name does not match (" + name + "), skipping..."); 344 return 0L; // Not here 345 } 346 347 final long entries = in.readLong(); 348 for( int i = 0; i < entries; i++ ) { 349 final String key = in.readUTF(); 350 final Object value = in.readObject(); 351 p.setAttribute( key, value ); 352 log.debug(" attr: "+key+"="+value); 353 } 354 355 sw.stop(); 356 log.debug("Read serialized data for "+name+" successfully in "+sw); 357 p.setHasMetadata(); 358 } 359 } 360 361 return saved; 362 } 363 364 /** 365 * Serializes hashmaps to disk. The format is private, don't touch it. 366 */ 367 private synchronized void serializeAttrsToDisk( final Page p ) { 368 final StopWatch sw = new StopWatch(); 369 sw.start(); 370 371 final String hashName = getHashFileName( p.getName() ); 372 if( hashName != null ) { 373 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 374 if( !f.exists() ) { 375 f.mkdirs(); 376 } 377 378 // Create a digest for the name 379 f = new File( f, hashName ); 380 381 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) { 382 // new Set to avoid concurrency issues 383 final Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() ); 384 385 if( entries.size() == 0 ) { 386 // Nothing to serialize, therefore we will just simply remove the serialization file so that the 387 // next time we boot, we don't deserialize old data. 388 f.delete(); 389 return; 390 } 391 392 out.writeLong( serialVersionUID ); 393 out.writeLong( System.currentTimeMillis() ); // Timestamp 394 out.writeUTF( p.getName() ); 395 out.writeLong( entries.size() ); 396 397 for( final Map.Entry< String, Object > e : entries ) { 398 if( e.getValue() instanceof Serializable ) { 399 out.writeUTF( e.getKey() ); 400 out.writeObject( e.getValue() ); 401 } 402 } 403 404 } catch( final IOException e ) { 405 log.error( "Unable to serialize!", e ); 406 } finally { 407 sw.stop(); 408 log.debug( "serialization for " + p.getName() + " done - took " + sw ); 409 } 410 } 411 412 } 413 414 /** 415 * After the page has been saved, updates the reference lists. 416 * 417 * @param context {@inheritDoc} 418 * @param content {@inheritDoc} 419 */ 420 @Override 421 public void postSave( final Context context, final String content ) { 422 final Page page = context.getPage(); 423 updateReferences( page.getName(), scanWikiLinks( page, content ) ); 424 serializeAttrsToDisk( page ); 425 } 426 427 /** 428 * Reads a WikiPageful of data from a String and returns all links internal to this Wiki in a Collection. 429 * 430 * @param page The WikiPage to scan 431 * @param pagedata The page contents 432 * @return a Collection of Strings 433 */ 434 @Override 435 public Collection< String > scanWikiLinks( final Page page, final String pagedata ) { 436 final LinkCollector localCollector = new LinkCollector(); 437 m_engine.getManager( RenderingManager.class ).textToHTML( Wiki.context().create( m_engine, page ), 438 pagedata, 439 localCollector, 440 null, 441 localCollector, 442 false, 443 true ); 444 445 return localCollector.getLinks(); 446 } 447 448 /** 449 * Updates the m_referedTo and m_referredBy hashmaps when a page has been deleted. 450 * <P> 451 * Within the m_refersTo map the pagename is a key. The whole key-value-set has to be removed to keep the map clean. 452 * Within the m_referredBy map the name is stored as a value. Since a key can have more than one value we have to 453 * delete just the key-value-pair referring page:deleted page. 454 * 455 * @param page Name of the page to remove from the maps. 456 */ 457 @Override 458 public synchronized void pageRemoved( final Page page ) { 459 pageRemoved( page.getName() ); 460 } 461 462 private void pageRemoved( final String pageName ) { 463 final Collection< String > refTo = m_refersTo.get( pageName ); 464 if( refTo != null ) { 465 for( final String referredPageName : refTo ) { 466 final Set< String > refBy = m_referredBy.get( referredPageName ); 467 if( refBy == null ) { 468 throw new InternalWikiException( "Refmgr out of sync: page " + pageName + 469 " refers to " + referredPageName + ", which has null referrers." ); 470 } 471 472 refBy.remove( pageName ); 473 m_referredBy.remove( referredPageName ); 474 475 // We won't put it back again if it becomes empty and does not exist. It will be added 476 // later on anyway, if it becomes referenced again. 477 if( !( refBy.isEmpty() && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPageName ) ) ) { 478 m_referredBy.put( referredPageName, refBy ); 479 } 480 } 481 482 log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName )); 483 m_refersTo.remove( pageName ); 484 } 485 486 final Set< String > refBy = m_referredBy.get( pageName ); 487 if( refBy == null || refBy.isEmpty() ) { 488 m_referredBy.remove( pageName ); 489 } 490 491 // Remove any traces from the disk, too 492 serializeToDisk(); 493 494 final String hashName = getHashFileName( pageName ); 495 if( hashName != null ) { 496 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 497 f = new File( f, getHashFileName( pageName ) ); 498 if( f.exists() ) { 499 f.delete(); 500 } 501 } 502 } 503 504 /** 505 * Updates all references for the given page. 506 * 507 * @param page wiki page for which references should be updated 508 */ 509 @Override 510 public void updateReferences( final Page page ) { 511 final String pageData = m_engine.getManager( PageManager.class ).getPureText( page.getName(), WikiProvider.LATEST_VERSION ); 512 updateReferences( page.getName(), scanWikiLinks( page, pageData ) ); 513 } 514 515 /** 516 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is removed 517 * and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 518 * <P> 519 * This is the method to call when a new page has been created and we want to a) set up its references and b) notify the 520 * referred pages of the references. Use this method during run-time. 521 * 522 * @param page Name of the page to update. 523 * @param references A Collection of Strings, each one pointing to a page this page references. 524 */ 525 @Override 526 public synchronized void updateReferences( final String page, final Collection< String > references ) { 527 internalUpdateReferences( page, references ); 528 serializeToDisk(); 529 } 530 531 /** 532 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is 533 * removed and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 534 * <p> 535 * This method does not synchronize the database to disk. 536 * 537 * @param page Name of the page to update. 538 * @param references A Collection of Strings, each one pointing to a page this page references. 539 */ 540 private void internalUpdateReferences( String page, final Collection< String > references) { 541 page = getFinalPageName( page ); 542 543 // Create a new entry in m_refersTo. 544 final Collection< String > oldRefTo = m_refersTo.get( page ); 545 m_refersTo.remove( page ); 546 547 final TreeSet< String > cleanedRefs = new TreeSet<>(); 548 for( final String ref : references ) { 549 final String reference = getFinalPageName( ref ); 550 cleanedRefs.add( reference ); 551 } 552 553 m_refersTo.put( page, cleanedRefs ); 554 555 // We know the page exists, since it's making references somewhere. If an entry for it didn't exist previously 556 // in m_referredBy, make sure one is added now. 557 if( !m_referredBy.containsKey( page ) ) { 558 m_referredBy.put( page, new TreeSet<>() ); 559 } 560 561 // Get all pages that used to be referred to by 'page' and remove that reference. (We don't want to try to figure out 562 // which particular references were removed...) 563 cleanReferredBy( page, oldRefTo, cleanedRefs ); 564 565 // Notify all referred pages of their referinesshoodicity. 566 for( final String referredPageName : cleanedRefs ) { 567 updateReferredBy( getFinalPageName( referredPageName ), page ); 568 } 569 } 570 571 /** 572 * Returns the refers-to list. For debugging. 573 * 574 * @return The refers-to list. 575 */ 576 protected Map< String, Collection< String > > getRefersTo() { 577 return m_refersTo; 578 } 579 580 /** 581 * Returns the referred-by list. For debugging. 582 * 583 * @return Referred-by lists. 584 */ 585 protected Map< String, Set< String > > getReferredBy() { 586 return m_referredBy; 587 } 588 589 /** 590 * Cleans the 'referred by' list, removing references by 'referrer' to any other page. Called after 'referrer' is removed. 591 * 592 * Two ways to go about this. One is to look up all pages previously referred by referrer and remove referrer 593 * from their lists, and let the update put them back in (except possibly removed ones). 594 * 595 * The other is to get the old referred-to list, compare to the new, and tell the ones missing in the latter to remove referrer from 596 * their list. 597 * 598 * We'll just try the first for now. Need to come back and optimize this a bit. 599 */ 600 private void cleanReferredBy( final String referrer, 601 final Collection< String > oldReferred, 602 final Collection< String > newReferred ) { 603 if( oldReferred == null ) { 604 return; 605 } 606 607 for( final String referredPage : oldReferred ) { 608 final Set< String > oldRefBy = m_referredBy.get( referredPage ); 609 if( oldRefBy != null ) { 610 oldRefBy.remove( referrer ); 611 } 612 613 // If the page is referred to by no one AND it doesn't even exist, we might just as well forget about this 614 // entry. It will be added again elsewhere if new references appear. 615 if( ( oldRefBy == null || oldRefBy.isEmpty() ) && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPage ) ) { 616 m_referredBy.remove( referredPage ); 617 } 618 } 619 } 620 621 /** 622 * When initially building a ReferenceManager from scratch, call this method BEFORE calling updateReferences() with 623 * a full list of existing page names. It builds the refersTo and referredBy key lists, thus enabling updateReferences() 624 * to function correctly. 625 * <P> 626 * This method should NEVER be called after initialization. It clears all mappings from the reference tables. 627 * 628 * @param pages a Collection containing WikiPage objects. 629 */ 630 private synchronized void buildKeyLists( final Collection< Page > pages ) { 631 m_refersTo.clear(); 632 m_referredBy.clear(); 633 if( pages == null ) { 634 return; 635 } 636 637 try { 638 for( final Page page : pages ) { 639 // We add a non-null entry to referredBy to indicate the referred page exists 640 m_referredBy.put( page.getName(), new TreeSet<>() ); 641 // Just add a key to refersTo; the keys need to be in sync with referredBy. 642 m_refersTo.put( page.getName(), null ); 643 } 644 } catch( final ClassCastException e ) { 645 log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e ); 646 } 647 } 648 649 650 /** 651 * Marks the page as referred to by the referrer. If the page does not exist previously, nothing is done. (This means 652 * that some page, somewhere, has a link to a page that does not exist.) 653 * <P> 654 * This method is NOT synchronized. It should only be referred to from within a synchronized method, or it should be 655 * made synced if necessary. 656 */ 657 private void updateReferredBy( final String page, final String referrer ) { 658 // We're not really interested in first level self-references. 659 /* 660 if( page.equals( referrer ) ) 661 { 662 return; 663 } 664 */ 665 // Neither are we interested if plural forms refer to each other. 666 if( m_matchEnglishPlurals ) { 667 final String p2 = page.endsWith( "s" ) ? page.substring( 0, page.length() - 1 ) : page + "s"; 668 if( referrer.equals( p2 ) ) { 669 return; 670 } 671 } 672 673 // Even if 'page' has not been created yet, it can still be referenced. This requires we don't use m_referredBy 674 // keys when looking up missing pages, of course. 675 final Set< String > referrers = m_referredBy.computeIfAbsent( page, k -> new TreeSet<>() ); 676 referrers.add( referrer ); 677 } 678 679 680 /** 681 * Clears the references to a certain page so it's no longer in the map. 682 * 683 * @param pagename Name of the page to clear references for. 684 */ 685 @Override public synchronized void clearPageEntries( String pagename ) { 686 pagename = getFinalPageName( pagename ); 687 688 // Remove this item from the referredBy list of any page which this item refers to. 689 final Collection< String > c = m_refersTo.get( pagename ); 690 if( c != null ) { 691 for( final String key : c ) { 692 final Collection< ? > dref = m_referredBy.get( key ); 693 dref.remove( pagename ); 694 } 695 } 696 697 // Finally, remove direct references. 698 m_referredBy.remove( pagename ); 699 m_refersTo.remove( pagename ); 700 } 701 702 703 /** 704 * Finds all unreferenced pages. This requires a linear scan through m_referredBy to locate keys with null or empty values. 705 * 706 * @return The Collection of Strings 707 */ 708 @Override public synchronized Collection< String > findUnreferenced() { 709 final ArrayList< String > unref = new ArrayList<>(); 710 for( final String key : m_referredBy.keySet() ) { 711 final Set< ? > refs = getReferenceList( m_referredBy, key ); 712 if( refs == null || refs.isEmpty() ) { 713 unref.add( key ); 714 } 715 } 716 717 return unref; 718 } 719 720 721 /** 722 * Finds all references to non-existant pages. This requires a linear scan through m_refersTo values; each value 723 * must have a corresponding key entry in the reference Maps, otherwise such a page has never been created. 724 * <P> 725 * Returns a Collection containing Strings of unreferenced page names. Each non-existant page name is shown only 726 * once - we don't return information on who referred to it. 727 * 728 * @return A Collection of Strings 729 */ 730 @Override public synchronized Collection< String > findUncreated() { 731 final TreeSet< String > uncreated = new TreeSet<>(); 732 733 // Go through m_refersTo values and check that m_refersTo has the corresponding keys. 734 // We want to reread the code to make sure our HashMaps are in sync... 735 final Collection< Collection< String > > allReferences = m_refersTo.values(); 736 for( final Collection<String> refs : allReferences ) { 737 if( refs != null ) { 738 for( final String aReference : refs ) { 739 if( !m_engine.getManager( PageManager.class ).wikiPageExists( aReference ) ) { 740 uncreated.add( aReference ); 741 } 742 } 743 } 744 } 745 746 return uncreated; 747 } 748 749 /** 750 * Searches for the given page in the given Map, and returns the set of references. This method also takes care of 751 * English plural matching. 752 * 753 * @param coll The Map to search in 754 * @param pagename The name to find. 755 * @return The references list. 756 */ 757 private < T > Set< T > getReferenceList( final Map< String, Set< T > > coll, final String pagename ) { 758 Set< T > refs = coll.get( pagename ); 759 760 if( m_matchEnglishPlurals ) { 761 // We'll add also matches from the "other" page. 762 final Set< T > refs2; 763 764 if( pagename.endsWith( "s" ) ) { 765 refs2 = coll.get( pagename.substring( 0, pagename.length() - 1 ) ); 766 } else { 767 refs2 = coll.get( pagename + "s" ); 768 } 769 770 if( refs2 != null ) { 771 if( refs != null ) { 772 refs.addAll( refs2 ); 773 } else { 774 refs = refs2; 775 } 776 } 777 } 778 return refs; 779 } 780 781 /** 782 * Find all pages that refer to this page. Returns null if the page does not exist or is not referenced at all, 783 * otherwise returns a collection containing page names (String) that refer to this one. 784 * <p> 785 * @param pagename The page to find referrers for. 786 * @return A Set of Strings. May return null, if the page does not exist, or if it has no references. 787 */ 788 @Override public synchronized Set< String > findReferrers( final String pagename ) { 789 final Set< String > refs = getReferenceList( m_referredBy, pagename ); 790 if( refs == null || refs.isEmpty() ) { 791 return null; 792 } 793 794 return refs; 795 } 796 797 /** 798 * Returns all pages that refer to this page. Note that this method returns an unmodifiable Map, which may be abruptly changed. 799 * So any access to any iterator may result in a ConcurrentModificationException. 800 * <p> 801 * The advantages of using this method over findReferrers() is that it is very fast, as it does not create a new object. 802 * The disadvantages are that it does not do any mapping between plural names, and you may end up getting a 803 * ConcurrentModificationException. 804 * 805 * @param pageName Page name to query. 806 * @return A Set of Strings containing the names of all the pages that refer to this page. May return null, if the page does 807 * not exist or has not been indexed yet. 808 * @since 2.2.33 809 */ 810 @Override public Set< String > findReferredBy( final String pageName ) { 811 return m_unmutableReferredBy.get( getFinalPageName(pageName) ); 812 } 813 814 /** 815 * Returns all pages that this page refers to. You can use this as a quick way of getting the links from a page, but note 816 * that it does not link any InterWiki, image, or external links. It does contain attachments, though. 817 * <p> 818 * The Collection returned is unmutable, so you cannot change it. It does reflect the current status and thus is a live 819 * object. So, if you are using any kind of an iterator on it, be prepared for ConcurrentModificationExceptions. 820 * <p> 821 * The returned value is a Collection, because a page may refer to another page multiple times. 822 * 823 * @param pageName Page name to query 824 * @return A Collection of Strings containing the names of the pages that this page refers to. May return null, if the page 825 * does not exist or has not been indexed yet. 826 * @since 2.2.33 827 */ 828 @Override public Collection< String > findRefersTo( final String pageName ) { 829 return m_unmutableRefersTo.get( getFinalPageName( pageName ) ); 830 } 831 832 /** 833 * This 'deepHashCode' can be used to determine if there were any modifications made to the underlying to and by maps of the 834 * ReferenceManager. The maps of the ReferenceManager are not synchronized, so someone could add/remove entries in them while the 835 * hashCode is being computed. 836 * 837 * This method traps and retries if a concurrent modification occurs. 838 * 839 * @return Sum of the hashCodes for the to and by maps of the ReferenceManager 840 * @since 2.3.24 841 */ 842 // 843 // TODO: It is unnecessary to calculate the hashcode; it should be calculated only when the hashmaps are changed. This is slow. 844 // 845 public int deepHashCode() { 846 boolean failed = true; 847 int signature = 0; 848 849 while( failed ) { 850 signature = 0; 851 try { 852 signature ^= m_referredBy.hashCode(); 853 signature ^= m_refersTo.hashCode(); 854 failed = false; 855 } catch ( final ConcurrentModificationException e) { 856 Thread.yield(); 857 } 858 } 859 860 return signature; 861 } 862 863 /** 864 * Returns a list of all pages that the ReferenceManager knows about. This should be roughly equivalent to 865 * PageManager.getAllPages(), but without the potential disk access overhead. Note that this method is not guaranteed 866 * to return a Set of really all pages (especially during startup), but it is very fast. 867 * 868 * @return A Set of all defined page names that ReferenceManager knows about. 869 * @since 2.3.24 870 */ 871 @Override public Set< String > findCreated() { 872 return new HashSet<>( m_refersTo.keySet() ); 873 } 874 875 private String getFinalPageName( final String orig ) { 876 try { 877 final String s = m_engine.getFinalPageName( orig ); 878 return s != null ? s : orig; 879 } catch( final ProviderException e ) { 880 log.error("Error while trying to fetch a page name; trying to cope with the situation.",e); 881 return orig; 882 } 883 } 884 885 /** 886 * {@inheritDoc} 887 */ 888 @Override 889 public void actionPerformed( final WikiEvent event ) { 890 if( event instanceof WikiPageEvent && event.getType() == WikiPageEvent.PAGE_DELETED ) { 891 final String pageName = ( ( WikiPageEvent ) event ).getPageName(); 892 if( pageName != null ) { 893 pageRemoved( pageName ); 894 } 895 } 896 } 897 898}