001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.references; 020 021import org.apache.commons.lang3.time.StopWatch; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024import org.apache.wiki.InternalWikiException; 025import org.apache.wiki.LinkCollector; 026import org.apache.wiki.api.core.Attachment; 027import org.apache.wiki.api.core.Context; 028import org.apache.wiki.api.core.Engine; 029import org.apache.wiki.api.core.Page; 030import org.apache.wiki.api.exceptions.ProviderException; 031import org.apache.wiki.api.filters.BasePageFilter; 032import org.apache.wiki.api.providers.PageProvider; 033import org.apache.wiki.api.providers.WikiProvider; 034import org.apache.wiki.api.spi.Wiki; 035import org.apache.wiki.attachment.AttachmentManager; 036import org.apache.wiki.event.WikiEvent; 037import org.apache.wiki.event.WikiEventManager; 038import org.apache.wiki.event.WikiPageEvent; 039import org.apache.wiki.pages.PageManager; 040import org.apache.wiki.render.RenderingManager; 041import org.apache.wiki.util.TextUtil; 042 043import java.io.*; 044import java.nio.charset.StandardCharsets; 045import java.nio.file.Files; 046import java.security.MessageDigest; 047import java.security.NoSuchAlgorithmException; 048import java.util.*; 049import java.util.concurrent.ConcurrentHashMap; 050 051/* 052 BUGS 053 054 - if a wikilink is added to a page, then removed, RefMan still thinks that the page refers to the wikilink page. Hm. 055 056 - if a page is deleted, gets very confused. 057 058 - Serialization causes page attributes to be missing, when InitializablePlugins are not executed properly. Thus, 059 serialization should really also mark whether a page is serializable or not... 060 */ 061 062 063/* 064 A word about synchronizing: 065 066 I expect this object to be accessed in three situations: 067 - when an Engine is created, and it scans its wikipages 068 - when the Engine saves a page 069 - when a JSP page accesses one of the Engine's ReferenceManagers to display a list of (un)referenced pages. 070 071 So, access to this class is fairly rare, and usually triggered by user interaction. OTOH, the methods in this class use their storage 072 objects intensively (and, sorry to say, in an unoptimized manner =). My deduction: using unsynchronized HashMaps etc. and syncing methods 073 or code blocks is preferrable to using slow, synced storage objects. We don't have iterative code here, so I'm going to use synced 074 methods for now. 075 076 Please contact me if you notice problems with ReferenceManager, and especially with synchronization, or if you have suggestions about 077 syncing. 078 079 ebu@memecry.net 080*/ 081 082/** 083 * Keeps track of wikipage references: 084 * <UL> 085 * <LI>What pages a given page refers to 086 * <LI>What pages refer to a given page 087 * </UL> 088 * 089 * This is a quick'n'dirty approach without any finesse in storage and searching algorithms; we trust java.util.*. 090 * <P> 091 * This class contains two HashMaps, m_refersTo and m_referredBy. The first is indexed by WikiPage names and contains a Collection of all 092 * WikiPages the page refers to. (Multiple references are not counted, naturally.) The second is indexed by WikiPage names and contains 093 * a Set of all pages that refer to the indexing page. (Notice - the keys of both Maps should be kept in sync.) 094 * <P> 095 * When a page is added or edited, its references are parsed, a Collection is received, and we crudely replace anything previous with 096 * this new Collection. We then check each referenced page name and make sure they know they are referred to by the new page. 097 * <P> 098 * Based on this information, we can perform non-optimal searches for e.g. unreferenced pages, top ten lists, etc. 099 * <P> 100 * The owning class must take responsibility of filling in any pre-existing information, probably by loading each and every WikiPage 101 * and calling this class to update the references when created. 102 * 103 * @since 1.6.1 (as of 2.11.0, moved to org.apache.wiki.references) 104 */ 105 106// FIXME: The way that we save attributes is now a major booboo, and must be 107// replace forthwith. However, this is a workaround for the great deal 108// of problems that occur here... 109public class DefaultReferenceManager extends BasePageFilter implements ReferenceManager { 110 111 /** 112 * Maps page wikiname to a Collection of pages it refers to. The Collection must contain Strings. The Collection may contain 113 * names of non-existing pages. 114 */ 115 private Map< String, Collection< String > > m_refersTo; 116 private Map< String, Collection< String > > m_unmutableRefersTo; 117 118 /** 119 * Maps page wikiname to a Set of referring pages. The Set must contain Strings. Non-existing pages (a reference exists, but 120 * not a file for the page contents) may have an empty Set in m_referredBy. 121 */ 122 private Map< String, Set< String > > m_referredBy; 123 private Map< String, Set< String > > m_unmutableReferredBy; 124 125 private final boolean m_matchEnglishPlurals; 126 127 private static final Logger LOG = LogManager.getLogger( DefaultReferenceManager.class); 128 private static final String SERIALIZATION_FILE = "refmgr.ser"; 129 private static final String SERIALIZATION_DIR = "refmgr-attr"; 130 131 /** We use this also a generic serialization id */ 132 private static final long serialVersionUID = 4L; 133 134 /** 135 * Builds a new ReferenceManager. 136 * 137 * @param engine The Engine to which this is managing references to. 138 */ 139 public DefaultReferenceManager( final Engine engine ) { 140 m_refersTo = new ConcurrentHashMap<>(); 141 m_referredBy = new ConcurrentHashMap<>(); 142 m_engine = engine; 143 m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), Engine.PROP_MATCHPLURALS, false ); 144 145 // 146 // Create two maps that contain unmutable versions of the two basic maps. 147 // 148 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 149 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 150 } 151 152 /** 153 * Does a full reference update. Does not sync; assumes that you do it afterwards. 154 */ 155 private void updatePageReferences( final Page page ) throws ProviderException { 156 final String content = m_engine.getManager( PageManager.class ).getPageText( page.getName(), PageProvider.LATEST_VERSION ); 157 final Collection< String > links = scanWikiLinks( page, content ); 158 final TreeSet< String > res = new TreeSet<>( links ); 159 final List< Attachment > attachments = m_engine.getManager( AttachmentManager.class ).listAttachments( page ); 160 for( final Attachment att : attachments ) { 161 res.add( att.getName() ); 162 } 163 164 internalUpdateReferences( page.getName(), res ); 165 } 166 167 /** 168 * Initializes the entire reference manager with the initial set of pages from the collection. 169 * 170 * @param pages A collection of all pages you want to be included in the reference count. 171 * @since 2.2 172 * @throws ProviderException If reading of pages fails. 173 */ 174 @Override 175 public void initialize( final Collection< Page > pages ) throws ProviderException { 176 LOG.debug( "Initializing new ReferenceManager with {} initial pages.", pages.size() ); 177 final StopWatch sw = new StopWatch(); 178 sw.start(); 179 LOG.info( "Starting cross reference scan of WikiPages" ); 180 181 // First, try to serialize old data from disk. If that fails, we'll go and update the entire reference lists (which'll take time) 182 try { 183 // Unserialize things. The loop below cannot be combined with the other loop below, simply because 184 // engine.getPage() has side effects such as loading initializing the user databases, which in turn want all 185 // the pages to be read already... 186 // 187 // Yes, this is a kludge. We know. Will be fixed. 188 final long saved = unserializeFromDisk(); 189 190 for( final Page page : pages ) { 191 unserializeAttrsFromDisk( page ); 192 } 193 194 // Now we must check if any of the pages have been changed while we were in the electronic la-la-land, 195 // and update the references for them. 196 for( final Page page : pages ) { 197 if( !( page instanceof Attachment ) ) { 198 // Refresh with the latest copy 199 final Page wp = m_engine.getManager( PageManager.class ).getPage( page.getName() ); 200 201 if( wp.getLastModified() == null ) { 202 LOG.fatal( "Provider returns null lastModified. Please submit a bug report." ); 203 } else if( wp.getLastModified().getTime() > saved ) { 204 updatePageReferences( wp ); 205 } 206 } 207 } 208 209 } catch( final Exception e ) { 210 LOG.info( "Unable to unserialize old refmgr information, rebuilding database: {}", e.getMessage() ); 211 buildKeyLists( pages ); 212 213 // Scan the existing pages from disk and update references in the manager. 214 for( final Page page : pages ) { 215 // We cannot build a reference list from the contents of attachments, so we skip them. 216 if( !( page instanceof Attachment ) ) { 217 updatePageReferences( page ); 218 serializeAttrsToDisk( page ); 219 } 220 } 221 222 serializeToDisk(); 223 } 224 225 sw.stop(); 226 LOG.info( "Cross reference scan done in {}", sw ); 227 228 WikiEventManager.addWikiEventListener( m_engine.getManager( PageManager.class ), this ); 229 } 230 231 /** 232 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 233 */ 234 @SuppressWarnings("unchecked") 235 private synchronized long unserializeFromDisk() throws IOException, ClassNotFoundException { 236 final long saved; 237 238 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 239 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( Files.newInputStream( f.toPath() ) ) ) ) { 240 final StopWatch sw = new StopWatch(); 241 sw.start(); 242 243 final long ver = in.readLong(); 244 245 if( ver != serialVersionUID ) { 246 throw new IOException("File format has changed; I need to recalculate references."); 247 } 248 249 saved = in.readLong(); 250 m_refersTo = ( Map< String, Collection< String > > ) in.readObject(); 251 m_referredBy = ( Map< String, Set< String > > ) in.readObject(); 252 253 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 254 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 255 256 sw.stop(); 257 LOG.debug( "Read serialized data successfully in {}", sw ); 258 } 259 260 return saved; 261 } 262 263 /** 264 * Serializes hashmaps to disk. The format is private, don't touch it. 265 */ 266 private synchronized void serializeToDisk() { 267 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 268 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream( f.toPath() ) ) ) ) { 269 final StopWatch sw = new StopWatch(); 270 sw.start(); 271 272 out.writeLong( serialVersionUID ); 273 out.writeLong( System.currentTimeMillis() ); // Timestamp 274 out.writeObject( m_refersTo ); 275 out.writeObject( m_referredBy ); 276 277 sw.stop(); 278 279 LOG.debug( "serialization done - took {}", sw ); 280 } catch( final IOException ioe ) { 281 LOG.error( "Unable to serialize!", ioe ); 282 } 283 } 284 285 private String getHashFileName( final String pageName ) { 286 if( pageName == null ) { 287 return null; 288 } 289 try { 290 final MessageDigest digest = MessageDigest.getInstance( "MD5" ); 291 final byte[] dig = digest.digest( pageName.getBytes( StandardCharsets.UTF_8 ) ); 292 293 return TextUtil.toHexString( dig ) + ".cache"; 294 } catch( final NoSuchAlgorithmException e ) { 295 LOG.fatal( "What do you mean - no such algorithm?", e ); 296 return null; 297 } 298 } 299 300 /** 301 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 302 */ 303 private synchronized long unserializeAttrsFromDisk( final Page p ) throws IOException, ClassNotFoundException { 304 long saved = 0L; 305 306 // Find attribute cache, and check if it exists 307 final String hashName = getHashFileName( p.getName() ); 308 if( hashName != null ) { 309 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 310 f = new File( f, hashName ); 311 if( !f.exists() ) { 312 return 0L; 313 } 314 315 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( Files.newInputStream( f.toPath() ) ) ) ) { 316 final StopWatch sw = new StopWatch(); 317 sw.start(); 318 LOG.debug( "Deserializing attributes for {}", p.getName() ); 319 320 final long ver = in.readLong(); 321 if( ver != serialVersionUID ) { 322 LOG.debug( "File format has changed; cannot deserialize." ); 323 return 0L; 324 } 325 326 saved = in.readLong(); 327 final String name = in.readUTF(); 328 if( !name.equals( p.getName() ) ) { 329 LOG.debug( "File name does not match ({}), skipping...", name ); 330 return 0L; // Not here 331 } 332 333 final long entries = in.readLong(); 334 for( int i = 0; i < entries; i++ ) { 335 final String key = in.readUTF(); 336 final Object value = in.readObject(); 337 p.setAttribute( key, value ); 338 LOG.debug( " attr: {}={}", key, value ); 339 } 340 341 sw.stop(); 342 LOG.debug( "Read serialized data for {} successfully in {}", name, sw ); 343 p.setHasMetadata(); 344 } 345 } 346 347 return saved; 348 } 349 350 /** 351 * Serializes hashmaps to disk. The format is private, don't touch it. 352 */ 353 private synchronized void serializeAttrsToDisk( final Page p ) { 354 final StopWatch sw = new StopWatch(); 355 sw.start(); 356 357 final String hashName = getHashFileName( p.getName() ); 358 if( hashName != null ) { 359 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 360 if( !f.exists() ) { 361 f.mkdirs(); 362 } 363 364 // Create a digest for the name 365 f = new File( f, hashName ); 366 367 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream( f.toPath() ) ) ) ) { 368 // new Set to avoid concurrency issues 369 final Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() ); 370 371 if( entries.size() == 0 ) { 372 // Nothing to serialize, therefore we will just simply remove the serialization file so that the 373 // next time we boot, we don't deserialize old data. 374 f.delete(); 375 return; 376 } 377 378 out.writeLong( serialVersionUID ); 379 out.writeLong( System.currentTimeMillis() ); // Timestamp 380 out.writeUTF( p.getName() ); 381 out.writeLong( entries.size() ); 382 383 for( final Map.Entry< String, Object > e : entries ) { 384 if( e.getValue() instanceof Serializable ) { 385 out.writeUTF( e.getKey() ); 386 out.writeObject( e.getValue() ); 387 } 388 } 389 390 } catch( final IOException e ) { 391 LOG.error( "Unable to serialize!", e ); 392 } finally { 393 sw.stop(); 394 LOG.debug( "serialization for {} done - took {}", p.getName(), sw ); 395 } 396 } 397 398 } 399 400 /** 401 * After the page has been saved, updates the reference lists. 402 * 403 * @param context {@inheritDoc} 404 * @param content {@inheritDoc} 405 */ 406 @Override 407 public void postSave( final Context context, final String content ) { 408 final Page page = context.getPage(); 409 updateReferences( page.getName(), scanWikiLinks( page, content ) ); 410 serializeAttrsToDisk( page ); 411 } 412 413 /** 414 * Reads a WikiPageful of data from a String and returns all links internal to this Wiki in a Collection. 415 * 416 * @param page The WikiPage to scan 417 * @param pagedata The page contents 418 * @return a Collection of Strings 419 */ 420 @Override 421 public Collection< String > scanWikiLinks( final Page page, final String pagedata ) { 422 final LinkCollector localCollector = new LinkCollector(); 423 m_engine.getManager( RenderingManager.class ).textToHTML( Wiki.context().create( m_engine, page ), 424 pagedata, 425 localCollector, 426 null, 427 localCollector, 428 false, 429 true ); 430 431 return localCollector.getLinks(); 432 } 433 434 /** 435 * Updates the m_referedTo and m_referredBy hashmaps when a page has been deleted. 436 * <P> 437 * Within the m_refersTo map the pagename is a key. The whole key-value-set has to be removed to keep the map clean. 438 * Within the m_referredBy map the name is stored as a value. Since a key can have more than one value we have to 439 * delete just the key-value-pair referring page:deleted page. 440 * 441 * @param page Name of the page to remove from the maps. 442 */ 443 @Override 444 public void pageRemoved( final Page page ) { 445 pageRemoved( page.getName() ); 446 } 447 448 private void pageRemoved( final String pageName ) { 449 final Collection< String > refTo = m_refersTo.get( pageName ); 450 if( refTo != null ) { 451 for( final String referredPageName : refTo ) { 452 final Set< String > refBy = m_referredBy.get( referredPageName ); 453 if( refBy == null ) { 454 throw new InternalWikiException( "Refmgr out of sync: page " + pageName + 455 " refers to " + referredPageName + ", which has null referrers." ); 456 } 457 458 refBy.remove( pageName ); 459 m_referredBy.remove( referredPageName ); 460 461 // We won't put it back again if it becomes empty and does not exist. It will be added 462 // later on anyway, if it becomes referenced again. 463 if( !( refBy.isEmpty() && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPageName ) ) ) { 464 m_referredBy.put( referredPageName, refBy ); 465 } 466 } 467 468 LOG.debug( "Removing from m_refersTo HashMap key:value {}:{}", pageName, m_refersTo.get( pageName ) ); 469 m_refersTo.remove( pageName ); 470 } 471 472 final Set< String > refBy = m_referredBy.get( pageName ); 473 if( refBy == null || refBy.isEmpty() ) { 474 m_referredBy.remove( pageName ); 475 } 476 477 // Remove any traces from the disk, too 478 serializeToDisk(); 479 480 final String hashName = getHashFileName( pageName ); 481 if( hashName != null ) { 482 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 483 f = new File( f, getHashFileName( pageName ) ); 484 if( f.exists() ) { 485 f.delete(); 486 } 487 } 488 } 489 490 /** 491 * Updates all references for the given page. 492 * 493 * @param page wiki page for which references should be updated 494 */ 495 @Override 496 public void updateReferences( final Page page ) { 497 final String pageData = m_engine.getManager( PageManager.class ).getPureText( page.getName(), WikiProvider.LATEST_VERSION ); 498 updateReferences( page.getName(), scanWikiLinks( page, pageData ) ); 499 } 500 501 /** 502 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is removed 503 * and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 504 * <P> 505 * This is the method to call when a new page has been created, and we want to a) set up its references and b) notify the 506 * referred pages of the references. Use this method during run-time. 507 * 508 * @param page Name of the page to update. 509 * @param references A Collection of Strings, each one pointing to a page this page references. 510 */ 511 @Override 512 public void updateReferences( final String page, final Collection< String > references ) { 513 internalUpdateReferences( page, references ); 514 serializeToDisk(); 515 } 516 517 /** 518 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is 519 * removed and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 520 * <p> 521 * This method does not synchronize the database to disk. 522 * 523 * @param page Name of the page to update. 524 * @param references A Collection of Strings, each one pointing to a page this page references. 525 */ 526 private void internalUpdateReferences( String page, final Collection< String > references) { 527 page = getFinalPageName( page ); 528 529 // Create a new entry in m_refersTo. 530 final Collection< String > oldRefTo = m_refersTo.get( page ); 531 m_refersTo.remove( page ); 532 533 final TreeSet< String > cleanedRefs = new TreeSet<>(); 534 for( final String ref : references ) { 535 final String reference = getFinalPageName( ref ); 536 cleanedRefs.add( reference ); 537 } 538 539 m_refersTo.put( page, cleanedRefs ); 540 541 // We know the page exists, since it's making references somewhere. If an entry for it didn't exist previously 542 // in m_referredBy, make sure one is added now. 543 if( !m_referredBy.containsKey( page ) ) { 544 m_referredBy.put( page, new TreeSet<>() ); 545 } 546 547 // Get all pages that used to be referred to by 'page' and remove that reference. (We don't want to try to figure out 548 // which particular references were removed...) 549 cleanReferredBy( page, oldRefTo, cleanedRefs ); 550 551 // Notify all referred pages of their referinesshoodicity. 552 for( final String referredPageName : cleanedRefs ) { 553 updateReferredBy( getFinalPageName( referredPageName ), page ); 554 } 555 } 556 557 /** 558 * Returns the refers-to list. For debugging. 559 * 560 * @return The refers-to list. 561 */ 562 protected Map< String, Collection< String > > getRefersTo() { 563 return m_refersTo; 564 } 565 566 /** 567 * Returns the referred-by list. For debugging. 568 * 569 * @return Referred-by lists. 570 */ 571 protected Map< String, Set< String > > getReferredBy() { 572 return m_referredBy; 573 } 574 575 /** 576 * Cleans the 'referred by' list, removing references by 'referrer' to any other page. Called after 'referrer' is removed. 577 * 578 * Two ways to go about this. One is to look up all pages previously referred by referrer and remove referrer 579 * from their lists, and let the update put them back in (except possibly removed ones). 580 * 581 * The other is to get the old referred-to list, compare to the new, and tell the ones missing in the latter to remove referrer from 582 * their list. 583 * 584 * We'll just try the first for now. Need to come back and optimize this a bit. 585 */ 586 private void cleanReferredBy( final String referrer, 587 final Collection< String > oldReferred, 588 final Collection< String > newReferred ) { 589 if( oldReferred == null ) { 590 return; 591 } 592 593 for( final String referredPage : oldReferred ) { 594 final Set< String > oldRefBy = m_referredBy.get( referredPage ); 595 if( oldRefBy != null ) { 596 oldRefBy.remove( referrer ); 597 } 598 599 // If the page is referred to by no one AND it doesn't even exist, we might just as well forget about this 600 // entry. It will be added again elsewhere if new references appear. 601 if( ( oldRefBy == null || oldRefBy.isEmpty() ) && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPage ) ) { 602 m_referredBy.remove( referredPage ); 603 } 604 } 605 } 606 607 /** 608 * When initially building a ReferenceManager from scratch, call this method BEFORE calling updateReferences() with 609 * a full list of existing page names. It builds the refersTo and referredBy key lists, thus enabling updateReferences() 610 * to function correctly. 611 * <P> 612 * This method should NEVER be called after initialization. It clears all mappings from the reference tables. 613 * 614 * @param pages a Collection containing WikiPage objects. 615 */ 616 private void buildKeyLists( final Collection< Page > pages ) { 617 m_refersTo.clear(); 618 m_referredBy.clear(); 619 if( pages == null ) { 620 return; 621 } 622 623 try { 624 for( final Page page : pages ) { 625 // We add a non-null entry to referredBy to indicate the referred page exists 626 m_referredBy.put( page.getName(), new TreeSet<>() ); 627 // Just add a key to refersTo; the keys need to be in sync with referredBy. 628 m_refersTo.put( page.getName(), new TreeSet<>() ); 629 } 630 } catch( final ClassCastException e ) { 631 LOG.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e ); 632 } 633 } 634 635 636 /** 637 * Marks the page as referred to by the referrer. If the page does not exist previously, nothing is done. (This means 638 * that some page, somewhere, has a link to a page that does not exist.) 639 */ 640 private void updateReferredBy( final String page, final String referrer ) { 641 // We're not really interested in first level self-references. 642 /* 643 if( page.equals( referrer ) ) 644 { 645 return; 646 } 647 */ 648 // Neither are we interested if plural forms refer to each other. 649 if( m_matchEnglishPlurals ) { 650 final String p2 = page.endsWith( "s" ) ? page.substring( 0, page.length() - 1 ) : page + "s"; 651 if( referrer.equals( p2 ) ) { 652 return; 653 } 654 } 655 656 // Even if 'page' has not been created yet, it can still be referenced. This requires we don't use m_referredBy 657 // keys when looking up missing pages, of course. 658 final Set< String > referrers = m_referredBy.computeIfAbsent( page, k -> new TreeSet<>() ); 659 referrers.add( referrer ); 660 } 661 662 663 /** 664 * Clears the references to a certain page, so it's no longer in the map. 665 * 666 * @param pagename Name of the page to clear references for. 667 */ 668 @Override 669 public void clearPageEntries( String pagename ) { 670 pagename = getFinalPageName( pagename ); 671 672 // Remove this item from the referredBy list of any page which this item refers to. 673 final Collection< String > c = m_refersTo.get( pagename ); 674 if( c != null ) { 675 for( final String key : c ) { 676 final Collection< ? > dref = m_referredBy.get( key ); 677 dref.remove( pagename ); 678 } 679 } 680 681 // Finally, remove direct references. 682 m_referredBy.remove( pagename ); 683 m_refersTo.remove( pagename ); 684 } 685 686 687 /** 688 * Finds all unreferenced pages. This requires a linear scan through m_referredBy to locate keys with null or empty values. 689 * 690 * @return The Collection of Strings 691 */ 692 @Override 693 public Collection< String > findUnreferenced() { 694 final ArrayList< String > unref = new ArrayList<>(); 695 for( final String key : m_referredBy.keySet() ) { 696 final Set< ? > refs = getReferenceList( m_referredBy, key ); 697 if( refs == null || refs.isEmpty() ) { 698 unref.add( key ); 699 } 700 } 701 702 return unref; 703 } 704 705 706 /** 707 * Finds all references to non-existant pages. This requires a linear scan through m_refersTo values; each value 708 * must have a corresponding key entry in the reference Maps, otherwise such a page has never been created. 709 * <P> 710 * Returns a Collection containing Strings of unreferenced page names. Each non-existant page name is shown only 711 * once - we don't return information on who referred to it. 712 * 713 * @return A Collection of Strings 714 */ 715 @Override 716 public Collection< String > findUncreated() { 717 final TreeSet< String > uncreated = new TreeSet<>(); 718 719 // Go through m_refersTo values and check that m_refersTo has the corresponding keys. 720 // We want to reread the code to make sure our HashMaps are in sync... 721 final Collection< Collection< String > > allReferences = m_refersTo.values(); 722 for( final Collection<String> refs : allReferences ) { 723 if( refs != null ) { 724 for( final String aReference : refs ) { 725 if( !m_engine.getManager( PageManager.class ).wikiPageExists( aReference ) ) { 726 uncreated.add( aReference ); 727 } 728 } 729 } 730 } 731 732 return uncreated; 733 } 734 735 /** 736 * Searches for the given page in the given Map, and returns the set of references. This method also takes care of 737 * English plural matching. 738 * 739 * @param coll The Map to search in 740 * @param pagename The name to find. 741 * @return The references list. 742 */ 743 private < T > Set< T > getReferenceList( final Map< String, Set< T > > coll, final String pagename ) { 744 Set< T > refs = coll.get( pagename ); 745 746 if( m_matchEnglishPlurals ) { 747 // We'll add also matches from the "other" page. 748 final Set< T > refs2; 749 750 if( pagename.endsWith( "s" ) ) { 751 refs2 = coll.get( pagename.substring( 0, pagename.length() - 1 ) ); 752 } else { 753 refs2 = coll.get( pagename + "s" ); 754 } 755 756 if( refs2 != null ) { 757 if( refs != null ) { 758 refs.addAll( refs2 ); 759 } else { 760 refs = refs2; 761 } 762 } 763 } 764 return refs; 765 } 766 767 /** 768 * Find all pages that refer to this page. Returns null if the page does not exist or is not referenced at all, 769 * otherwise returns a collection containing page names (String) that refer to this one. 770 * <p> 771 * @param pagename The page to find referrers for. 772 * @return A Set of Strings. May return null, if the page does not exist, or if it has no references. 773 */ 774 @Override 775 public Set< String > findReferrers( final String pagename ) { 776 final Set< String > refs = getReferenceList( m_referredBy, pagename ); 777 if( refs == null || refs.isEmpty() ) { 778 return null; 779 } 780 781 return refs; 782 } 783 784 /** 785 * Returns all pages that refer to this page. Note that this method returns an unmodifiable Map, which may be abruptly changed. 786 * So any access to any iterator may result in a ConcurrentModificationException. 787 * <p> 788 * The advantages of using this method over findReferrers() is that it is very fast, as it does not create a new object. 789 * The disadvantages are that it does not do any mapping between plural names, and you may end up getting a 790 * ConcurrentModificationException. 791 * 792 * @param pageName Page name to query. 793 * @return A Set of Strings containing the names of all the pages that refer to this page. May return null, if the page does 794 * not exist or has not been indexed yet. 795 * @since 2.2.33 796 */ 797 @Override 798 public Set< String > findReferredBy( final String pageName ) { 799 return m_unmutableReferredBy.get( getFinalPageName(pageName) ); 800 } 801 802 /** 803 * Returns all pages that this page refers to. You can use this as a quick way of getting the links from a page, but note 804 * that it does not link any InterWiki, image, or external links. It does contain attachments, though. 805 * <p> 806 * The Collection returned is unmutable, so you cannot change it. It does reflect the current status and thus is a live 807 * object. So, if you are using any kind of an iterator on it, be prepared for ConcurrentModificationExceptions. 808 * <p> 809 * The returned value is a Collection, because a page may refer to another page multiple times. 810 * 811 * @param pageName Page name to query 812 * @return A Collection of Strings containing the names of the pages that this page refers to. May return null, if the page 813 * does not exist or has not been indexed yet. 814 * @since 2.2.33 815 */ 816 @Override 817 public Collection< String > findRefersTo( final String pageName ) { 818 return m_unmutableRefersTo.get( getFinalPageName( pageName ) ); 819 } 820 821 /** 822 * This 'deepHashCode' can be used to determine if there were any modifications made to the underlying to and by maps of the 823 * ReferenceManager. The maps of the ReferenceManager are not synchronized, so someone could add/remove entries in them while the 824 * hashCode is being computed. 825 * 826 * This method traps and retries if a concurrent modification occurs. 827 * 828 * @return Sum of the hashCodes for the to and by maps of the ReferenceManager 829 * @since 2.3.24 830 */ 831 // 832 // TODO: It is unnecessary to calculate the hashcode; it should be calculated only when the hashmaps are changed. This is slow. 833 // 834 public int deepHashCode() { 835 boolean failed = true; 836 int signature = 0; 837 838 while( failed ) { 839 signature = 0; 840 try { 841 signature ^= m_referredBy.hashCode(); 842 signature ^= m_refersTo.hashCode(); 843 failed = false; 844 } catch ( final ConcurrentModificationException e) { 845 Thread.yield(); 846 } 847 } 848 849 return signature; 850 } 851 852 /** 853 * Returns a list of all pages that the ReferenceManager knows about. This should be roughly equivalent to 854 * PageManager.getAllPages(), but without the potential disk access overhead. Note that this method is not guaranteed 855 * to return a Set of really all pages (especially during startup), but it is very fast. 856 * 857 * @return A Set of all defined page names that ReferenceManager knows about. 858 * @since 2.3.24 859 */ 860 @Override 861 public Set< String > findCreated() { 862 return new HashSet<>( m_refersTo.keySet() ); 863 } 864 865 private String getFinalPageName( final String orig ) { 866 try { 867 final String s = m_engine.getFinalPageName( orig ); 868 return s != null ? s : orig; 869 } catch( final ProviderException e ) { 870 LOG.error( "Error while trying to fetch a page name; trying to cope with the situation.", e ); 871 return orig; 872 } 873 } 874 875 /** 876 * {@inheritDoc} 877 */ 878 @Override 879 public void actionPerformed( final WikiEvent event ) { 880 if( event instanceof WikiPageEvent && event.getType() == WikiPageEvent.PAGE_DELETED ) { 881 final String pageName = ( ( WikiPageEvent ) event ).getPageName(); 882 if( pageName != null ) { 883 pageRemoved( pageName ); 884 } 885 } 886 } 887 888}