001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.references; 020 021import org.apache.commons.lang3.time.StopWatch; 022import org.apache.log4j.Logger; 023import org.apache.wiki.InternalWikiException; 024import org.apache.wiki.LinkCollector; 025import org.apache.wiki.api.core.Attachment; 026import org.apache.wiki.api.core.Context; 027import org.apache.wiki.api.core.Engine; 028import org.apache.wiki.api.core.Page; 029import org.apache.wiki.api.exceptions.ProviderException; 030import org.apache.wiki.api.filters.BasePageFilter; 031import org.apache.wiki.api.providers.PageProvider; 032import org.apache.wiki.api.providers.WikiProvider; 033import org.apache.wiki.api.spi.Wiki; 034import org.apache.wiki.attachment.AttachmentManager; 035import org.apache.wiki.event.WikiEvent; 036import org.apache.wiki.event.WikiEventManager; 037import org.apache.wiki.event.WikiPageEvent; 038import org.apache.wiki.pages.PageManager; 039import org.apache.wiki.render.RenderingManager; 040import org.apache.wiki.util.TextUtil; 041 042import java.io.*; 043import java.nio.charset.StandardCharsets; 044import java.security.MessageDigest; 045import java.security.NoSuchAlgorithmException; 046import java.util.*; 047import java.util.concurrent.ConcurrentHashMap; 048 049/* 050 BUGS 051 052 - if a wikilink is added to a page, then removed, RefMan still thinks that the page refers to the wikilink page. Hm. 053 054 - if a page is deleted, gets very confused. 055 056 - Serialization causes page attributes to be missing, when InitializablePlugins are not executed properly. Thus, 057 serialization should really also mark whether a page is serializable or not... 058 */ 059 060 061/* 062 A word about synchronizing: 063 064 I expect this object to be accessed in three situations: 065 - when a Engine is created and it scans its wikipages 066 - when the WE saves a page 067 - when a JSP page accesses one of the WE's ReferenceManagers to display a list of (un)referenced pages. 068 069 So, access to this class is fairly rare, and usually triggered by user interaction. OTOH, the methods in this class use their storage 070 objects intensively (and, sorry to say, in an unoptimized manner =). My deduction: using unsynchronized HashMaps etc and syncing methods 071 or code blocks is preferrable to using slow, synced storage objects. We don't have iterative code here, so I'm going to use synced 072 methods for now. 073 074 Please contact me if you notice problems with ReferenceManager, and especially with synchronization, or if you have suggestions about 075 syncing. 076 077 ebu@memecry.net 078*/ 079 080/** 081 * Keeps track of wikipage references: 082 * <UL> 083 * <LI>What pages a given page refers to 084 * <LI>What pages refer to a given page 085 * </UL> 086 * 087 * This is a quick'n'dirty approach without any finesse in storage and searching algorithms; we trust java.util.*. 088 * <P> 089 * This class contains two HashMaps, m_refersTo and m_referredBy. The first is indexed by WikiPage names and contains a Collection of all 090 * WikiPages the page refers to. (Multiple references are not counted, naturally.) The second is indexed by WikiPage names and contains 091 * a Set of all pages that refer to the indexing page. (Notice - the keys of both Maps should be kept in sync.) 092 * <P> 093 * When a page is added or edited, its references are parsed, a Collection is received, and we crudely replace anything previous with 094 * this new Collection. We then check each referenced page name and make sure they know they are referred to by the new page. 095 * <P> 096 * Based on this information, we can perform non-optimal searches for e.g. unreferenced pages, top ten lists, etc. 097 * <P> 098 * The owning class must take responsibility of filling in any pre-existing information, probably by loading each and every WikiPage 099 * and calling this class to update the references when created. 100 * 101 * @since 1.6.1 (as of 2.11.0, moved to org.apache.wiki.references) 102 */ 103 104// FIXME: The way that we save attributes is now a major booboo, and must be 105// replace forthwith. However, this is a workaround for the great deal 106// of problems that occur here... 107public class DefaultReferenceManager extends BasePageFilter implements ReferenceManager { 108 109 /** 110 * Maps page wikiname to a Collection of pages it refers to. The Collection must contain Strings. The Collection may contain 111 * names of non-existing pages. 112 */ 113 private Map< String, Collection< String > > m_refersTo; 114 private Map< String, Collection< String > > m_unmutableRefersTo; 115 116 /** 117 * Maps page wikiname to a Set of referring pages. The Set must contain Strings. Non-existing pages (a reference exists, but 118 * not a file for the page contents) may have an empty Set in m_referredBy. 119 */ 120 private Map< String, Set< String > > m_referredBy; 121 private Map< String, Set< String > > m_unmutableReferredBy; 122 123 private boolean m_matchEnglishPlurals; 124 125 private static final Logger log = Logger.getLogger( DefaultReferenceManager.class); 126 private static final String SERIALIZATION_FILE = "refmgr.ser"; 127 private static final String SERIALIZATION_DIR = "refmgr-attr"; 128 129 /** We use this also a generic serialization id */ 130 private static final long serialVersionUID = 4L; 131 132 /** 133 * Builds a new ReferenceManager. 134 * 135 * @param engine The Engine to which this is managing references to. 136 */ 137 public DefaultReferenceManager( final Engine engine ) { 138 m_refersTo = new ConcurrentHashMap<>(); 139 m_referredBy = new ConcurrentHashMap<>(); 140 m_engine = engine; 141 m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), Engine.PROP_MATCHPLURALS, false ); 142 143 // 144 // Create two maps that contain unmutable versions of the two basic maps. 145 // 146 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 147 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 148 } 149 150 /** 151 * Does a full reference update. Does not sync; assumes that you do it afterwards. 152 */ 153 private void updatePageReferences( final Page page ) throws ProviderException { 154 final String content = m_engine.getManager( PageManager.class ).getPageText( page.getName(), PageProvider.LATEST_VERSION ); 155 final Collection< String > links = scanWikiLinks( page, content ); 156 final TreeSet< String > res = new TreeSet<>( links ); 157 final List< Attachment > attachments = m_engine.getManager( AttachmentManager.class ).listAttachments( page ); 158 for( final Attachment att : attachments ) { 159 res.add( att.getName() ); 160 } 161 162 internalUpdateReferences( page.getName(), res ); 163 } 164 165 /** 166 * Initializes the entire reference manager with the initial set of pages from the collection. 167 * 168 * @param pages A collection of all pages you want to be included in the reference count. 169 * @since 2.2 170 * @throws ProviderException If reading of pages fails. 171 */ 172 @Override 173 public void initialize( final Collection< Page > pages ) throws ProviderException { 174 log.debug( "Initializing new ReferenceManager with " + pages.size() + " initial pages." ); 175 final StopWatch sw = new StopWatch(); 176 sw.start(); 177 log.info( "Starting cross reference scan of WikiPages" ); 178 179 // First, try to serialize old data from disk. If that fails, we'll go and update the entire reference lists (which'll take time) 180 try { 181 // Unserialize things. The loop below cannot be combined with the other loop below, simply because 182 // engine.getPage() has side effects such as loading initializing the user databases, which in turn want all 183 // of the pages to be read already... 184 // 185 // Yes, this is a kludge. We know. Will be fixed. 186 final long saved = unserializeFromDisk(); 187 188 for( final Page page : pages ) { 189 unserializeAttrsFromDisk( page ); 190 } 191 192 // Now we must check if any of the pages have been changed while we were in the electronic la-la-land, 193 // and update the references for them. 194 for( final Page page : pages ) { 195 if( !( page instanceof Attachment ) ) { 196 // Refresh with the latest copy 197 final Page wp = m_engine.getManager( PageManager.class ).getPage( page.getName() ); 198 199 if( wp.getLastModified() == null ) { 200 log.fatal( "Provider returns null lastModified. Please submit a bug report." ); 201 } else if( wp.getLastModified().getTime() > saved ) { 202 updatePageReferences( wp ); 203 } 204 } 205 } 206 207 } catch( final Exception e ) { 208 log.info( "Unable to unserialize old refmgr information, rebuilding database: " + e.getMessage() ); 209 buildKeyLists( pages ); 210 211 // Scan the existing pages from disk and update references in the manager. 212 for( final Page page : pages ) { 213 // We cannot build a reference list from the contents of attachments, so we skip them. 214 if( !( page instanceof Attachment ) ) { 215 updatePageReferences( page ); 216 serializeAttrsToDisk( page ); 217 } 218 } 219 220 serializeToDisk(); 221 } 222 223 sw.stop(); 224 log.info( "Cross reference scan done in "+sw ); 225 226 WikiEventManager.addWikiEventListener( m_engine.getManager( PageManager.class ), this ); 227 } 228 229 /** 230 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 231 */ 232 @SuppressWarnings("unchecked") 233 private synchronized long unserializeFromDisk() throws IOException, ClassNotFoundException { 234 final long saved; 235 236 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 237 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) { 238 final StopWatch sw = new StopWatch(); 239 sw.start(); 240 241 final long ver = in.readLong(); 242 243 if( ver != serialVersionUID ) { 244 throw new IOException("File format has changed; I need to recalculate references."); 245 } 246 247 saved = in.readLong(); 248 m_refersTo = ( Map< String, Collection< String > > ) in.readObject(); 249 m_referredBy = ( Map< String, Set< String > > ) in.readObject(); 250 251 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 252 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 253 254 sw.stop(); 255 log.debug("Read serialized data successfully in "+sw); 256 } 257 258 return saved; 259 } 260 261 /** 262 * Serializes hashmaps to disk. The format is private, don't touch it. 263 */ 264 private synchronized void serializeToDisk() { 265 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 266 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) { 267 final StopWatch sw = new StopWatch(); 268 sw.start(); 269 270 out.writeLong( serialVersionUID ); 271 out.writeLong( System.currentTimeMillis() ); // Timestamp 272 out.writeObject( m_refersTo ); 273 out.writeObject( m_referredBy ); 274 275 sw.stop(); 276 277 log.debug("serialization done - took "+sw); 278 } catch( final IOException ioe ) { 279 log.error("Unable to serialize!", ioe); 280 } 281 } 282 283 private String getHashFileName( final String pageName ) { 284 if( pageName == null ) { 285 return null; 286 } 287 try { 288 final MessageDigest digest = MessageDigest.getInstance( "MD5" ); 289 final byte[] dig = digest.digest( pageName.getBytes( StandardCharsets.UTF_8 ) ); 290 291 return TextUtil.toHexString( dig ) + ".cache"; 292 } catch( final NoSuchAlgorithmException e ) { 293 log.fatal( "What do you mean - no such algorithm?", e ); 294 return null; 295 } 296 } 297 298 /** 299 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 300 */ 301 private synchronized long unserializeAttrsFromDisk( final Page p ) throws IOException, ClassNotFoundException { 302 long saved = 0L; 303 304 // Find attribute cache, and check if it exists 305 final String hashName = getHashFileName( p.getName() ); 306 if( hashName != null ) { 307 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 308 f = new File( f, hashName ); 309 if( !f.exists() ) { 310 return 0L; 311 } 312 313 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( new FileInputStream( f ) ) ) ) { 314 final StopWatch sw = new StopWatch(); 315 sw.start(); 316 log.debug( "Deserializing attributes for " + p.getName() ); 317 318 final long ver = in.readLong(); 319 if( ver != serialVersionUID ) { 320 log.debug("File format has changed; cannot deserialize."); 321 return 0L; 322 } 323 324 saved = in.readLong(); 325 final String name = in.readUTF(); 326 if( !name.equals( p.getName() ) ) { 327 log.debug("File name does not match (" + name + "), skipping..."); 328 return 0L; // Not here 329 } 330 331 final long entries = in.readLong(); 332 for( int i = 0; i < entries; i++ ) { 333 final String key = in.readUTF(); 334 final Object value = in.readObject(); 335 p.setAttribute( key, value ); 336 log.debug(" attr: "+key+"="+value); 337 } 338 339 sw.stop(); 340 log.debug("Read serialized data for "+name+" successfully in "+sw); 341 p.setHasMetadata(); 342 } 343 } 344 345 return saved; 346 } 347 348 /** 349 * Serializes hashmaps to disk. The format is private, don't touch it. 350 */ 351 private synchronized void serializeAttrsToDisk( final Page p ) { 352 final StopWatch sw = new StopWatch(); 353 sw.start(); 354 355 final String hashName = getHashFileName( p.getName() ); 356 if( hashName != null ) { 357 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 358 if( !f.exists() ) { 359 f.mkdirs(); 360 } 361 362 // Create a digest for the name 363 f = new File( f, hashName ); 364 365 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( new FileOutputStream( f ) ) ) ) { 366 // new Set to avoid concurrency issues 367 final Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() ); 368 369 if( entries.size() == 0 ) { 370 // Nothing to serialize, therefore we will just simply remove the serialization file so that the 371 // next time we boot, we don't deserialize old data. 372 f.delete(); 373 return; 374 } 375 376 out.writeLong( serialVersionUID ); 377 out.writeLong( System.currentTimeMillis() ); // Timestamp 378 out.writeUTF( p.getName() ); 379 out.writeLong( entries.size() ); 380 381 for( final Map.Entry< String, Object > e : entries ) { 382 if( e.getValue() instanceof Serializable ) { 383 out.writeUTF( e.getKey() ); 384 out.writeObject( e.getValue() ); 385 } 386 } 387 388 } catch( final IOException e ) { 389 log.error( "Unable to serialize!", e ); 390 } finally { 391 sw.stop(); 392 log.debug( "serialization for " + p.getName() + " done - took " + sw ); 393 } 394 } 395 396 } 397 398 /** 399 * After the page has been saved, updates the reference lists. 400 * 401 * @param context {@inheritDoc} 402 * @param content {@inheritDoc} 403 */ 404 @Override 405 public void postSave( final Context context, final String content ) { 406 final Page page = context.getPage(); 407 updateReferences( page.getName(), scanWikiLinks( page, content ) ); 408 serializeAttrsToDisk( page ); 409 } 410 411 /** 412 * Reads a WikiPageful of data from a String and returns all links internal to this Wiki in a Collection. 413 * 414 * @param page The WikiPage to scan 415 * @param pagedata The page contents 416 * @return a Collection of Strings 417 */ 418 @Override 419 public Collection< String > scanWikiLinks( final Page page, final String pagedata ) { 420 final LinkCollector localCollector = new LinkCollector(); 421 m_engine.getManager( RenderingManager.class ).textToHTML( Wiki.context().create( m_engine, page ), 422 pagedata, 423 localCollector, 424 null, 425 localCollector, 426 false, 427 true ); 428 429 return localCollector.getLinks(); 430 } 431 432 /** 433 * Updates the m_referedTo and m_referredBy hashmaps when a page has been deleted. 434 * <P> 435 * Within the m_refersTo map the pagename is a key. The whole key-value-set has to be removed to keep the map clean. 436 * Within the m_referredBy map the name is stored as a value. Since a key can have more than one value we have to 437 * delete just the key-value-pair referring page:deleted page. 438 * 439 * @param page Name of the page to remove from the maps. 440 */ 441 @Override 442 public synchronized void pageRemoved( final Page page ) { 443 pageRemoved( page.getName() ); 444 } 445 446 private void pageRemoved( final String pageName ) { 447 final Collection< String > refTo = m_refersTo.get( pageName ); 448 if( refTo != null ) { 449 for( final String referredPageName : refTo ) { 450 final Set< String > refBy = m_referredBy.get( referredPageName ); 451 if( refBy == null ) { 452 throw new InternalWikiException( "Refmgr out of sync: page " + pageName + 453 " refers to " + referredPageName + ", which has null referrers." ); 454 } 455 456 refBy.remove( pageName ); 457 m_referredBy.remove( referredPageName ); 458 459 // We won't put it back again if it becomes empty and does not exist. It will be added 460 // later on anyway, if it becomes referenced again. 461 if( !( refBy.isEmpty() && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPageName ) ) ) { 462 m_referredBy.put( referredPageName, refBy ); 463 } 464 } 465 466 log.debug("Removing from m_refersTo HashMap key:value "+pageName+":"+m_refersTo.get( pageName )); 467 m_refersTo.remove( pageName ); 468 } 469 470 final Set< String > refBy = m_referredBy.get( pageName ); 471 if( refBy == null || refBy.isEmpty() ) { 472 m_referredBy.remove( pageName ); 473 } 474 475 // Remove any traces from the disk, too 476 serializeToDisk(); 477 478 final String hashName = getHashFileName( pageName ); 479 if( hashName != null ) { 480 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 481 f = new File( f, getHashFileName( pageName ) ); 482 if( f.exists() ) { 483 f.delete(); 484 } 485 } 486 } 487 488 /** 489 * Updates all references for the given page. 490 * 491 * @param page wiki page for which references should be updated 492 */ 493 @Override 494 public void updateReferences( final Page page ) { 495 final String pageData = m_engine.getManager( PageManager.class ).getPureText( page.getName(), WikiProvider.LATEST_VERSION ); 496 updateReferences( page.getName(), scanWikiLinks( page, pageData ) ); 497 } 498 499 /** 500 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is removed 501 * and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 502 * <P> 503 * This is the method to call when a new page has been created and we want to a) set up its references and b) notify the 504 * referred pages of the references. Use this method during run-time. 505 * 506 * @param page Name of the page to update. 507 * @param references A Collection of Strings, each one pointing to a page this page references. 508 */ 509 @Override 510 public synchronized void updateReferences( final String page, final Collection< String > references ) { 511 internalUpdateReferences( page, references ); 512 serializeToDisk(); 513 } 514 515 /** 516 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is 517 * removed and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 518 * <p> 519 * This method does not synchronize the database to disk. 520 * 521 * @param page Name of the page to update. 522 * @param references A Collection of Strings, each one pointing to a page this page references. 523 */ 524 private void internalUpdateReferences( String page, final Collection< String > references) { 525 page = getFinalPageName( page ); 526 527 // Create a new entry in m_refersTo. 528 final Collection< String > oldRefTo = m_refersTo.get( page ); 529 m_refersTo.remove( page ); 530 531 final TreeSet< String > cleanedRefs = new TreeSet<>(); 532 for( final String ref : references ) { 533 final String reference = getFinalPageName( ref ); 534 cleanedRefs.add( reference ); 535 } 536 537 m_refersTo.put( page, cleanedRefs ); 538 539 // We know the page exists, since it's making references somewhere. If an entry for it didn't exist previously 540 // in m_referredBy, make sure one is added now. 541 if( !m_referredBy.containsKey( page ) ) { 542 m_referredBy.put( page, new TreeSet<>() ); 543 } 544 545 // Get all pages that used to be referred to by 'page' and remove that reference. (We don't want to try to figure out 546 // which particular references were removed...) 547 cleanReferredBy( page, oldRefTo, cleanedRefs ); 548 549 // Notify all referred pages of their referinesshoodicity. 550 for( final String referredPageName : cleanedRefs ) { 551 updateReferredBy( getFinalPageName( referredPageName ), page ); 552 } 553 } 554 555 /** 556 * Returns the refers-to list. For debugging. 557 * 558 * @return The refers-to list. 559 */ 560 protected Map< String, Collection< String > > getRefersTo() { 561 return m_refersTo; 562 } 563 564 /** 565 * Returns the referred-by list. For debugging. 566 * 567 * @return Referred-by lists. 568 */ 569 protected Map< String, Set< String > > getReferredBy() { 570 return m_referredBy; 571 } 572 573 /** 574 * Cleans the 'referred by' list, removing references by 'referrer' to any other page. Called after 'referrer' is removed. 575 * 576 * Two ways to go about this. One is to look up all pages previously referred by referrer and remove referrer 577 * from their lists, and let the update put them back in (except possibly removed ones). 578 * 579 * The other is to get the old referred-to list, compare to the new, and tell the ones missing in the latter to remove referrer from 580 * their list. 581 * 582 * We'll just try the first for now. Need to come back and optimize this a bit. 583 */ 584 private void cleanReferredBy( final String referrer, 585 final Collection< String > oldReferred, 586 final Collection< String > newReferred ) { 587 if( oldReferred == null ) { 588 return; 589 } 590 591 for( final String referredPage : oldReferred ) { 592 final Set< String > oldRefBy = m_referredBy.get( referredPage ); 593 if( oldRefBy != null ) { 594 oldRefBy.remove( referrer ); 595 } 596 597 // If the page is referred to by no one AND it doesn't even exist, we might just as well forget about this 598 // entry. It will be added again elsewhere if new references appear. 599 if( ( oldRefBy == null || oldRefBy.isEmpty() ) && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPage ) ) { 600 m_referredBy.remove( referredPage ); 601 } 602 } 603 } 604 605 /** 606 * When initially building a ReferenceManager from scratch, call this method BEFORE calling updateReferences() with 607 * a full list of existing page names. It builds the refersTo and referredBy key lists, thus enabling updateReferences() 608 * to function correctly. 609 * <P> 610 * This method should NEVER be called after initialization. It clears all mappings from the reference tables. 611 * 612 * @param pages a Collection containing WikiPage objects. 613 */ 614 private synchronized void buildKeyLists( final Collection< Page > pages ) { 615 m_refersTo.clear(); 616 m_referredBy.clear(); 617 if( pages == null ) { 618 return; 619 } 620 621 try { 622 for( final Page page : pages ) { 623 // We add a non-null entry to referredBy to indicate the referred page exists 624 m_referredBy.put( page.getName(), new TreeSet<>() ); 625 // Just add a key to refersTo; the keys need to be in sync with referredBy. 626 m_refersTo.put( page.getName(), new TreeSet<>() ); 627 } 628 } catch( final ClassCastException e ) { 629 log.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e ); 630 } 631 } 632 633 634 /** 635 * Marks the page as referred to by the referrer. If the page does not exist previously, nothing is done. (This means 636 * that some page, somewhere, has a link to a page that does not exist.) 637 * <P> 638 * This method is NOT synchronized. It should only be referred to from within a synchronized method, or it should be 639 * made synced if necessary. 640 */ 641 private void updateReferredBy( final String page, final String referrer ) { 642 // We're not really interested in first level self-references. 643 /* 644 if( page.equals( referrer ) ) 645 { 646 return; 647 } 648 */ 649 // Neither are we interested if plural forms refer to each other. 650 if( m_matchEnglishPlurals ) { 651 final String p2 = page.endsWith( "s" ) ? page.substring( 0, page.length() - 1 ) : page + "s"; 652 if( referrer.equals( p2 ) ) { 653 return; 654 } 655 } 656 657 // Even if 'page' has not been created yet, it can still be referenced. This requires we don't use m_referredBy 658 // keys when looking up missing pages, of course. 659 final Set< String > referrers = m_referredBy.computeIfAbsent( page, k -> new TreeSet<>() ); 660 referrers.add( referrer ); 661 } 662 663 664 /** 665 * Clears the references to a certain page so it's no longer in the map. 666 * 667 * @param pagename Name of the page to clear references for. 668 */ 669 @Override 670 public synchronized void clearPageEntries( String pagename ) { 671 pagename = getFinalPageName( pagename ); 672 673 // Remove this item from the referredBy list of any page which this item refers to. 674 final Collection< String > c = m_refersTo.get( pagename ); 675 if( c != null ) { 676 for( final String key : c ) { 677 final Collection< ? > dref = m_referredBy.get( key ); 678 dref.remove( pagename ); 679 } 680 } 681 682 // Finally, remove direct references. 683 m_referredBy.remove( pagename ); 684 m_refersTo.remove( pagename ); 685 } 686 687 688 /** 689 * Finds all unreferenced pages. This requires a linear scan through m_referredBy to locate keys with null or empty values. 690 * 691 * @return The Collection of Strings 692 */ 693 @Override 694 public synchronized Collection< String > findUnreferenced() { 695 final ArrayList< String > unref = new ArrayList<>(); 696 for( final String key : m_referredBy.keySet() ) { 697 final Set< ? > refs = getReferenceList( m_referredBy, key ); 698 if( refs == null || refs.isEmpty() ) { 699 unref.add( key ); 700 } 701 } 702 703 return unref; 704 } 705 706 707 /** 708 * Finds all references to non-existant pages. This requires a linear scan through m_refersTo values; each value 709 * must have a corresponding key entry in the reference Maps, otherwise such a page has never been created. 710 * <P> 711 * Returns a Collection containing Strings of unreferenced page names. Each non-existant page name is shown only 712 * once - we don't return information on who referred to it. 713 * 714 * @return A Collection of Strings 715 */ 716 @Override 717 public synchronized Collection< String > findUncreated() { 718 final TreeSet< String > uncreated = new TreeSet<>(); 719 720 // Go through m_refersTo values and check that m_refersTo has the corresponding keys. 721 // We want to reread the code to make sure our HashMaps are in sync... 722 final Collection< Collection< String > > allReferences = m_refersTo.values(); 723 for( final Collection<String> refs : allReferences ) { 724 if( refs != null ) { 725 for( final String aReference : refs ) { 726 if( !m_engine.getManager( PageManager.class ).wikiPageExists( aReference ) ) { 727 uncreated.add( aReference ); 728 } 729 } 730 } 731 } 732 733 return uncreated; 734 } 735 736 /** 737 * Searches for the given page in the given Map, and returns the set of references. This method also takes care of 738 * English plural matching. 739 * 740 * @param coll The Map to search in 741 * @param pagename The name to find. 742 * @return The references list. 743 */ 744 private < T > Set< T > getReferenceList( final Map< String, Set< T > > coll, final String pagename ) { 745 Set< T > refs = coll.get( pagename ); 746 747 if( m_matchEnglishPlurals ) { 748 // We'll add also matches from the "other" page. 749 final Set< T > refs2; 750 751 if( pagename.endsWith( "s" ) ) { 752 refs2 = coll.get( pagename.substring( 0, pagename.length() - 1 ) ); 753 } else { 754 refs2 = coll.get( pagename + "s" ); 755 } 756 757 if( refs2 != null ) { 758 if( refs != null ) { 759 refs.addAll( refs2 ); 760 } else { 761 refs = refs2; 762 } 763 } 764 } 765 return refs; 766 } 767 768 /** 769 * Find all pages that refer to this page. Returns null if the page does not exist or is not referenced at all, 770 * otherwise returns a collection containing page names (String) that refer to this one. 771 * <p> 772 * @param pagename The page to find referrers for. 773 * @return A Set of Strings. May return null, if the page does not exist, or if it has no references. 774 */ 775 @Override 776 public synchronized Set< String > findReferrers( final String pagename ) { 777 final Set< String > refs = getReferenceList( m_referredBy, pagename ); 778 if( refs == null || refs.isEmpty() ) { 779 return null; 780 } 781 782 return refs; 783 } 784 785 /** 786 * Returns all pages that refer to this page. Note that this method returns an unmodifiable Map, which may be abruptly changed. 787 * So any access to any iterator may result in a ConcurrentModificationException. 788 * <p> 789 * The advantages of using this method over findReferrers() is that it is very fast, as it does not create a new object. 790 * The disadvantages are that it does not do any mapping between plural names, and you may end up getting a 791 * ConcurrentModificationException. 792 * 793 * @param pageName Page name to query. 794 * @return A Set of Strings containing the names of all the pages that refer to this page. May return null, if the page does 795 * not exist or has not been indexed yet. 796 * @since 2.2.33 797 */ 798 @Override 799 public Set< String > findReferredBy( final String pageName ) { 800 return m_unmutableReferredBy.get( getFinalPageName(pageName) ); 801 } 802 803 /** 804 * Returns all pages that this page refers to. You can use this as a quick way of getting the links from a page, but note 805 * that it does not link any InterWiki, image, or external links. It does contain attachments, though. 806 * <p> 807 * The Collection returned is unmutable, so you cannot change it. It does reflect the current status and thus is a live 808 * object. So, if you are using any kind of an iterator on it, be prepared for ConcurrentModificationExceptions. 809 * <p> 810 * The returned value is a Collection, because a page may refer to another page multiple times. 811 * 812 * @param pageName Page name to query 813 * @return A Collection of Strings containing the names of the pages that this page refers to. May return null, if the page 814 * does not exist or has not been indexed yet. 815 * @since 2.2.33 816 */ 817 @Override 818 public Collection< String > findRefersTo( final String pageName ) { 819 return m_unmutableRefersTo.get( getFinalPageName( pageName ) ); 820 } 821 822 /** 823 * This 'deepHashCode' can be used to determine if there were any modifications made to the underlying to and by maps of the 824 * ReferenceManager. The maps of the ReferenceManager are not synchronized, so someone could add/remove entries in them while the 825 * hashCode is being computed. 826 * 827 * This method traps and retries if a concurrent modification occurs. 828 * 829 * @return Sum of the hashCodes for the to and by maps of the ReferenceManager 830 * @since 2.3.24 831 */ 832 // 833 // TODO: It is unnecessary to calculate the hashcode; it should be calculated only when the hashmaps are changed. This is slow. 834 // 835 public int deepHashCode() { 836 boolean failed = true; 837 int signature = 0; 838 839 while( failed ) { 840 signature = 0; 841 try { 842 signature ^= m_referredBy.hashCode(); 843 signature ^= m_refersTo.hashCode(); 844 failed = false; 845 } catch ( final ConcurrentModificationException e) { 846 Thread.yield(); 847 } 848 } 849 850 return signature; 851 } 852 853 /** 854 * Returns a list of all pages that the ReferenceManager knows about. This should be roughly equivalent to 855 * PageManager.getAllPages(), but without the potential disk access overhead. Note that this method is not guaranteed 856 * to return a Set of really all pages (especially during startup), but it is very fast. 857 * 858 * @return A Set of all defined page names that ReferenceManager knows about. 859 * @since 2.3.24 860 */ 861 @Override 862 public Set< String > findCreated() { 863 return new HashSet<>( m_refersTo.keySet() ); 864 } 865 866 private String getFinalPageName( final String orig ) { 867 try { 868 final String s = m_engine.getFinalPageName( orig ); 869 return s != null ? s : orig; 870 } catch( final ProviderException e ) { 871 log.error("Error while trying to fetch a page name; trying to cope with the situation.",e); 872 return orig; 873 } 874 } 875 876 /** 877 * {@inheritDoc} 878 */ 879 @Override 880 public void actionPerformed( final WikiEvent event ) { 881 if( event instanceof WikiPageEvent && event.getType() == WikiPageEvent.PAGE_DELETED ) { 882 final String pageName = ( ( WikiPageEvent ) event ).getPageName(); 883 if( pageName != null ) { 884 pageRemoved( pageName ); 885 } 886 } 887 } 888 889}