001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.references; 020 021import org.apache.commons.lang3.time.StopWatch; 022import org.apache.logging.log4j.LogManager; 023import org.apache.logging.log4j.Logger; 024import org.apache.wiki.InternalWikiException; 025import org.apache.wiki.LinkCollector; 026import org.apache.wiki.api.core.Attachment; 027import org.apache.wiki.api.core.Context; 028import org.apache.wiki.api.core.Engine; 029import org.apache.wiki.api.core.Page; 030import org.apache.wiki.api.exceptions.ProviderException; 031import org.apache.wiki.api.filters.BasePageFilter; 032import org.apache.wiki.api.providers.PageProvider; 033import org.apache.wiki.api.providers.WikiProvider; 034import org.apache.wiki.api.spi.Wiki; 035import org.apache.wiki.attachment.AttachmentManager; 036import org.apache.wiki.event.WikiEvent; 037import org.apache.wiki.event.WikiEventManager; 038import org.apache.wiki.event.WikiPageEvent; 039import org.apache.wiki.pages.PageManager; 040import org.apache.wiki.render.RenderingManager; 041import org.apache.wiki.util.TextUtil; 042 043import java.io.*; 044import java.nio.charset.StandardCharsets; 045import java.nio.file.Files; 046import java.security.MessageDigest; 047import java.security.NoSuchAlgorithmException; 048import java.util.*; 049import java.util.concurrent.ConcurrentHashMap; 050import java.util.stream.Collectors; 051 052/* 053 BUGS 054 055 - if a wikilink is added to a page, then removed, RefMan still thinks that the page refers to the wikilink page. Hm. 056 057 - if a page is deleted, gets very confused. 058 059 - Serialization causes page attributes to be missing, when InitializablePlugins are not executed properly. Thus, 060 serialization should really also mark whether a page is serializable or not... 061 */ 062 063 064/* 065 A word about synchronizing: 066 067 I expect this object to be accessed in three situations: 068 - when an Engine is created, and it scans its wikipages 069 - when the Engine saves a page 070 - when a JSP page accesses one of the Engine's ReferenceManagers to display a list of (un)referenced pages. 071 072 So, access to this class is fairly rare, and usually triggered by user interaction. OTOH, the methods in this class use their storage 073 objects intensively (and, sorry to say, in an unoptimized manner =). My deduction: using unsynchronized HashMaps etc. and syncing methods 074 or code blocks is preferrable to using slow, synced storage objects. We don't have iterative code here, so I'm going to use synced 075 methods for now. 076 077 Please contact me if you notice problems with ReferenceManager, and especially with synchronization, or if you have suggestions about 078 syncing. 079 080 ebu@memecry.net 081*/ 082 083/** 084 * Keeps track of wikipage references: 085 * <UL> 086 * <LI>What pages a given page refers to 087 * <LI>What pages refer to a given page 088 * </UL> 089 * 090 * This is a quick'n'dirty approach without any finesse in storage and searching algorithms; we trust java.util.*. 091 * <P> 092 * This class contains two HashMaps, m_refersTo and m_referredBy. The first is indexed by WikiPage names and contains a Collection of all 093 * WikiPages the page refers to. (Multiple references are not counted, naturally.) The second is indexed by WikiPage names and contains 094 * a Set of all pages that refer to the indexing page. (Notice - the keys of both Maps should be kept in sync.) 095 * <P> 096 * When a page is added or edited, its references are parsed, a Collection is received, and we crudely replace anything previous with 097 * this new Collection. We then check each referenced page name and make sure they know they are referred to by the new page. 098 * <P> 099 * Based on this information, we can perform non-optimal searches for e.g. unreferenced pages, top ten lists, etc. 100 * <P> 101 * The owning class must take responsibility of filling in any pre-existing information, probably by loading each and every WikiPage 102 * and calling this class to update the references when created. 103 * 104 * @since 1.6.1 (as of 2.11.0, moved to org.apache.wiki.references) 105 */ 106 107// FIXME: The way that we save attributes is now a major booboo, and must be 108// replaced forthwith. However, this is a workaround for the great deal 109// of problems that occur here... 110public class DefaultReferenceManager extends BasePageFilter implements ReferenceManager, Serializable { 111 112 /** 113 * Maps page wikiname to a Collection of pages it refers to. The Collection must contain Strings. The Collection may contain 114 * names of non-existing pages. 115 */ 116 private Map< String, Collection< String > > m_refersTo; 117 private Map< String, Collection< String > > m_unmutableRefersTo; 118 119 /** 120 * Maps page wikiname to a Set of referring pages. The Set must contain Strings. Non-existing pages (a reference exists, but 121 * not a file for the page contents) may have an empty Set in m_referredBy. 122 */ 123 private Map< String, Set< String > > m_referredBy; 124 private Map< String, Set< String > > m_unmutableReferredBy; 125 126 private final boolean m_matchEnglishPlurals; 127 128 private static final Logger LOG = LogManager.getLogger( DefaultReferenceManager.class); 129 private static final String SERIALIZATION_FILE = "refmgr.ser"; 130 private static final String SERIALIZATION_DIR = "refmgr-attr"; 131 132 /** We use this also a generic serialization id */ 133 private static final long serialVersionUID = 4L; 134 135 /** 136 * Builds a new ReferenceManager. 137 * 138 * @param engine The Engine to which this is managing references to. 139 */ 140 public DefaultReferenceManager( final Engine engine ) { 141 m_refersTo = new ConcurrentHashMap<>(); 142 m_referredBy = new ConcurrentHashMap<>(); 143 m_engine = engine; 144 m_matchEnglishPlurals = TextUtil.getBooleanProperty( engine.getWikiProperties(), Engine.PROP_MATCHPLURALS, false ); 145 146 // 147 // Create two maps that contain unmutable versions of the two basic maps. 148 // 149 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 150 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 151 } 152 153 /** 154 * Does a full reference update. Does not sync; assumes that you do it afterwards. 155 */ 156 private void updatePageReferences( final Page page ) throws ProviderException { 157 final String content = m_engine.getManager( PageManager.class ).getPageText( page.getName(), PageProvider.LATEST_VERSION ); 158 final Collection< String > links = scanWikiLinks( page, content ); 159 final TreeSet< String > res = new TreeSet<>( links ); 160 final List< Attachment > attachments = m_engine.getManager( AttachmentManager.class ).listAttachments( page ); 161 for( final Attachment att : attachments ) { 162 res.add( att.getName() ); 163 } 164 165 internalUpdateReferences( page.getName(), res ); 166 } 167 168 /** 169 * Initializes the entire reference manager with the initial set of pages from the collection. 170 * 171 * @param pages A collection of all pages you want to be included in the reference count. 172 * @since 2.2 173 * @throws ProviderException If reading of pages fails. 174 */ 175 @Override 176 public void initialize( final Collection< Page > pages ) throws ProviderException { 177 LOG.debug( "Initializing new ReferenceManager with {} initial pages.", pages.size() ); 178 final StopWatch sw = new StopWatch(); 179 sw.start(); 180 LOG.info( "Starting cross reference scan of WikiPages" ); 181 182 // First, try to serialize old data from disk. If that fails, we'll go and update the entire reference lists (which'll take time) 183 try { 184 // Unserialize things. The loop below cannot be combined with the other loop below, simply because 185 // engine.getPage() has side effects such as loading initializing the user databases, which in turn want all 186 // the pages to be read already... 187 // 188 // Yes, this is a kludge. We know. Will be fixed. 189 final long saved = unserializeFromDisk(); 190 191 for( final Page page : pages ) { 192 unserializeAttrsFromDisk( page ); 193 } 194 195 // Now we must check if any of the pages have been changed while we were in the electronic la-la-land, 196 // and update the references for them. 197 for( final Page page : pages ) { 198 if( !( page instanceof Attachment ) ) { 199 // Refresh with the latest copy 200 final Page wp = m_engine.getManager( PageManager.class ).getPage( page.getName() ); 201 202 if( wp.getLastModified() == null ) { 203 LOG.fatal( "Provider returns null lastModified. Please submit a bug report." ); 204 } else if( wp.getLastModified().getTime() > saved ) { 205 updatePageReferences( wp ); 206 } 207 } 208 } 209 210 } catch( final Exception e ) { 211 LOG.info( "Unable to unserialize old refmgr information, rebuilding database: {}", e.getMessage() ); 212 buildKeyLists( pages ); 213 214 // Scan the existing pages from disk and update references in the manager. 215 for( final Page page : pages ) { 216 // We cannot build a reference list from the contents of attachments, so we skip them. 217 if( !( page instanceof Attachment ) ) { 218 updatePageReferences( page ); 219 serializeAttrsToDisk( page ); 220 } 221 } 222 223 serializeToDisk(); 224 } 225 226 sw.stop(); 227 LOG.info( "Cross reference scan done in {}", sw ); 228 229 WikiEventManager.addWikiEventListener( m_engine.getManager( PageManager.class ), this ); 230 } 231 232 /** 233 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 234 */ 235 @SuppressWarnings("unchecked") 236 private synchronized long unserializeFromDisk() throws IOException, ClassNotFoundException { 237 final long saved; 238 239 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 240 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( Files.newInputStream( f.toPath() ) ) ) ) { 241 final StopWatch sw = new StopWatch(); 242 sw.start(); 243 244 final long ver = in.readLong(); 245 246 if( ver != serialVersionUID ) { 247 throw new IOException("File format has changed; I need to recalculate references."); 248 } 249 250 saved = in.readLong(); 251 m_refersTo = ( Map< String, Collection< String > > ) in.readObject(); 252 m_referredBy = ( Map< String, Set< String > > ) in.readObject(); 253 254 m_unmutableReferredBy = Collections.unmodifiableMap( m_referredBy ); 255 m_unmutableRefersTo = Collections.unmodifiableMap( m_refersTo ); 256 257 sw.stop(); 258 LOG.debug( "Read serialized data successfully in {}", sw ); 259 } 260 261 return saved; 262 } 263 264 /** 265 * Serializes hashmaps to disk. The format is private, don't touch it. 266 */ 267 private synchronized void serializeToDisk() { 268 final File f = new File( m_engine.getWorkDir(), SERIALIZATION_FILE ); 269 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream( f.toPath() ) ) ) ) { 270 final StopWatch sw = new StopWatch(); 271 sw.start(); 272 273 out.writeLong( serialVersionUID ); 274 out.writeLong( System.currentTimeMillis() ); // Timestamp 275 out.writeObject( m_refersTo ); 276 out.writeObject( m_referredBy ); 277 278 sw.stop(); 279 280 LOG.debug( "serialization done - took {}", sw ); 281 } catch( final IOException ioe ) { 282 LOG.error( "Unable to serialize!", ioe ); 283 } 284 } 285 286 private String getHashFileName( final String pageName ) { 287 if( pageName == null ) { 288 return null; 289 } 290 try { 291 final MessageDigest digest = MessageDigest.getInstance( "MD5" ); 292 final byte[] dig = digest.digest( pageName.getBytes( StandardCharsets.UTF_8 ) ); 293 294 return TextUtil.toHexString( dig ) + ".cache"; 295 } catch( final NoSuchAlgorithmException e ) { 296 LOG.fatal( "What do you mean - no such algorithm?", e ); 297 return null; 298 } 299 } 300 301 /** 302 * Reads the serialized data from the disk back to memory. Returns the date when the data was last written on disk 303 */ 304 private synchronized long unserializeAttrsFromDisk( final Page p ) throws IOException, ClassNotFoundException { 305 long saved = 0L; 306 307 // Find attribute cache, and check if it exists 308 final String hashName = getHashFileName( p.getName() ); 309 if( hashName != null ) { 310 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 311 f = new File( f, hashName ); 312 if( !f.exists() ) { 313 return 0L; 314 } 315 316 try( final ObjectInputStream in = new ObjectInputStream( new BufferedInputStream( Files.newInputStream( f.toPath() ) ) ) ) { 317 final StopWatch sw = new StopWatch(); 318 sw.start(); 319 LOG.debug( "Deserializing attributes for {}", p.getName() ); 320 321 final long ver = in.readLong(); 322 if( ver != serialVersionUID ) { 323 LOG.debug( "File format has changed; cannot deserialize." ); 324 return 0L; 325 } 326 327 saved = in.readLong(); 328 final String name = in.readUTF(); 329 if( !name.equals( p.getName() ) ) { 330 LOG.debug( "File name does not match ({}), skipping...", name ); 331 return 0L; // Not here 332 } 333 334 final long entries = in.readLong(); 335 for( int i = 0; i < entries; i++ ) { 336 final String key = in.readUTF(); 337 final Object value = in.readObject(); 338 p.setAttribute( key, value ); 339 LOG.debug( " attr: {}={}", key, value ); 340 } 341 342 sw.stop(); 343 LOG.debug( "Read serialized data for {} successfully in {}", name, sw ); 344 p.setHasMetadata(); 345 } 346 } 347 348 return saved; 349 } 350 351 /** 352 * Serializes hashmaps to disk. The format is private, don't touch it. 353 */ 354 private synchronized void serializeAttrsToDisk( final Page p ) { 355 final StopWatch sw = new StopWatch(); 356 sw.start(); 357 358 final String hashName = getHashFileName( p.getName() ); 359 if( hashName != null ) { 360 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 361 if( !f.exists() ) { 362 f.mkdirs(); 363 } 364 365 // Create a digest for the name 366 f = new File( f, hashName ); 367 368 try( final ObjectOutputStream out = new ObjectOutputStream( new BufferedOutputStream( Files.newOutputStream( f.toPath() ) ) ) ) { 369 // new Set to avoid concurrency issues 370 final Set< Map.Entry < String, Object > > entries = new HashSet<>( p.getAttributes().entrySet() ); 371 372 if(entries.isEmpty()) { 373 // Nothing to serialize, therefore we will just simply remove the serialization file so that the 374 // next time we boot, we don't deserialize old data. 375 f.delete(); 376 return; 377 } 378 379 out.writeLong( serialVersionUID ); 380 out.writeLong( System.currentTimeMillis() ); // Timestamp 381 out.writeUTF( p.getName() ); 382 out.writeLong( entries.size() ); 383 384 for( final Map.Entry< String, Object > e : entries ) { 385 if( e.getValue() instanceof Serializable ) { 386 out.writeUTF( e.getKey() ); 387 out.writeObject( e.getValue() ); 388 } 389 } 390 391 } catch( final IOException e ) { 392 LOG.error( "Unable to serialize!", e ); 393 } finally { 394 sw.stop(); 395 LOG.debug( "serialization for {} done - took {}", p.getName(), sw ); 396 } 397 } 398 399 } 400 401 /** 402 * After the page has been saved, updates the reference lists. 403 * 404 * @param context {@inheritDoc} 405 * @param content {@inheritDoc} 406 */ 407 @Override 408 public void postSave( final Context context, final String content ) { 409 final Page page = context.getPage(); 410 updateReferences( page.getName(), scanWikiLinks( page, content ) ); 411 serializeAttrsToDisk( page ); 412 } 413 414 /** 415 * Reads a WikiPageful of data from a String and returns all links internal to this Wiki in a Collection. 416 * 417 * @param page The WikiPage to scan 418 * @param pagedata The page contents 419 * @return a Collection of Strings 420 */ 421 @Override 422 public Collection< String > scanWikiLinks( final Page page, final String pagedata ) { 423 final LinkCollector localCollector = new LinkCollector(); 424 m_engine.getManager( RenderingManager.class ).textToHTML( Wiki.context().create( m_engine, page ), 425 pagedata, 426 localCollector, 427 null, 428 localCollector, 429 false, 430 true ); 431 432 return localCollector.getLinks(); 433 } 434 435 /** 436 * Updates the m_referedTo and m_referredBy hashmaps when a page has been deleted. 437 * <P> 438 * Within the m_refersTo map the pagename is a key. The whole key-value-set has to be removed to keep the map clean. 439 * Within the m_referredBy map the name is stored as a value. Since a key can have more than one value we have to 440 * delete just the key-value-pair referring page:deleted page. 441 * 442 * @param page Name of the page to remove from the maps. 443 */ 444 @Override 445 public void pageRemoved( final Page page ) { 446 pageRemoved( page.getName() ); 447 } 448 449 private void pageRemoved( final String pageName ) { 450 final Collection< String > refTo = m_refersTo.get( pageName ); 451 if( refTo != null ) { 452 for( final String referredPageName : refTo ) { 453 final Set< String > refBy = m_referredBy.get( referredPageName ); 454 if( refBy == null ) { 455 throw new InternalWikiException( "Refmgr out of sync: page " + pageName + 456 " refers to " + referredPageName + ", which has null referrers." ); 457 } 458 459 refBy.remove( pageName ); 460 m_referredBy.remove( referredPageName ); 461 462 // We won't put it back again if it becomes empty and does not exist. It will be added 463 // later on anyway, if it becomes referenced again. 464 if( !( refBy.isEmpty() && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPageName ) ) ) { 465 m_referredBy.put( referredPageName, refBy ); 466 } 467 } 468 469 LOG.debug( "Removing from m_refersTo HashMap key:value {}:{}", pageName, m_refersTo.get( pageName ) ); 470 m_refersTo.remove( pageName ); 471 } 472 473 final Set< String > refBy = m_referredBy.get( pageName ); 474 if( refBy == null || refBy.isEmpty() ) { 475 m_referredBy.remove( pageName ); 476 } 477 478 // Remove any traces from the disk, too 479 serializeToDisk(); 480 481 final String hashName = getHashFileName( pageName ); 482 if( hashName != null ) { 483 File f = new File( m_engine.getWorkDir(), SERIALIZATION_DIR ); 484 f = new File( f, getHashFileName( pageName ) ); 485 if( f.exists() ) { 486 f.delete(); 487 } 488 } 489 } 490 491 /** 492 * Updates all references for the given page. 493 * 494 * @param page wiki page for which references should be updated 495 */ 496 @Override 497 public void updateReferences( final Page page ) { 498 final String pageData = m_engine.getManager( PageManager.class ).getPureText( page.getName(), WikiProvider.LATEST_VERSION ); 499 updateReferences( page.getName(), scanWikiLinks( page, pageData ) ); 500 } 501 502 /** 503 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is removed 504 * and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 505 * <P> 506 * This is the method to call when a new page has been created, and we want to a) set up its references and b) notify the 507 * referred pages of the references. Use this method during run-time. 508 * 509 * @param page Name of the page to update. 510 * @param references A Collection of Strings, each one pointing to a page this page references. 511 */ 512 @Override 513 public void updateReferences( final String page, final Collection< String > references ) { 514 internalUpdateReferences( page, references ); 515 serializeToDisk(); 516 } 517 518 /** 519 * Updates the referred pages of a new or edited WikiPage. If a refersTo entry for this page already exists, it is 520 * removed and a new one is built from scratch. Also calls updateReferredBy() for each referenced page. 521 * <p> 522 * This method does not synchronize the database to disk. 523 * 524 * @param page Name of the page to update. 525 * @param references A Collection of Strings, each one pointing to a page this page references. 526 */ 527 private void internalUpdateReferences( String page, final Collection< String > references) { 528 page = getFinalPageName( page ); 529 530 // Create a new entry in m_refersTo. 531 final Collection< String > oldRefTo = m_refersTo.get( page ); 532 m_refersTo.remove( page ); 533 534 final TreeSet< String > cleanedRefs = references.stream().map(this::getFinalPageName).collect(Collectors.toCollection(TreeSet::new)); 535 536 m_refersTo.put( page, cleanedRefs ); 537 538 // We know the page exists, since it's making references somewhere. If an entry for it didn't exist previously 539 // in m_referredBy, make sure one is added now. 540 if( !m_referredBy.containsKey( page ) ) { 541 m_referredBy.put( page, new TreeSet<>() ); 542 } 543 544 // Get all pages that used to be referred to by 'page' and remove that reference. (We don't want to try to figure out 545 // which particular references were removed...) 546 cleanReferredBy( page, oldRefTo); 547 548 // Notify all referred pages of their referinesshoodicity. 549 for( final String referredPageName : cleanedRefs ) { 550 updateReferredBy( getFinalPageName( referredPageName ), page ); 551 } 552 } 553 554 /** 555 * Returns the refers-to list. For debugging. 556 * 557 * @return The refers-to list. 558 */ 559 protected Map< String, Collection< String > > getRefersTo() { 560 return m_refersTo; 561 } 562 563 /** 564 * Returns the referred-by list. For debugging. 565 * 566 * @return Referred-by lists. 567 */ 568 protected Map< String, Set< String > > getReferredBy() { 569 return m_referredBy; 570 } 571 572 /** 573 * Cleans the 'referred by' list, removing references by 'referrer' to any other page. Called after 'referrer' is removed. 574 * 575 * Two ways to go about this. One is to look up all pages previously referred by referrer and remove referrer 576 * from their lists, and let the update put them back in (except possibly removed ones). 577 * 578 * The other is to get the old referred-to list, compare to the new, and tell the ones missing in the latter to remove referrer from 579 * their list. 580 * 581 * We'll just try the first for now. Need to come back and optimize this a bit. 582 */ 583 private void cleanReferredBy( final String referrer, 584 final Collection< String > oldReferred ) { 585 if( oldReferred == null ) { 586 return; 587 } 588 589 for( final String referredPage : oldReferred ) { 590 final Set< String > oldRefBy = m_referredBy.get( referredPage ); 591 if( oldRefBy != null ) { 592 oldRefBy.remove( referrer ); 593 } 594 595 // If the page is referred to by no one AND it doesn't even exist, we might just as well forget about this 596 // entry. It will be added again elsewhere if new references appear. 597 if( ( oldRefBy == null || oldRefBy.isEmpty() ) && !m_engine.getManager( PageManager.class ).wikiPageExists( referredPage ) ) { 598 m_referredBy.remove( referredPage ); 599 } 600 } 601 } 602 603 /** 604 * When initially building a ReferenceManager from scratch, call this method BEFORE calling updateReferences() with 605 * a full list of existing page names. It builds the refersTo and referredBy key lists, thus enabling updateReferences() 606 * to function correctly. 607 * <P> 608 * This method should NEVER be called after initialization. It clears all mappings from the reference tables. 609 * 610 * @param pages a Collection containing WikiPage objects. 611 */ 612 private void buildKeyLists( final Collection< Page > pages ) { 613 m_refersTo.clear(); 614 m_referredBy.clear(); 615 if( pages == null ) { 616 return; 617 } 618 619 try { 620 for( final Page page : pages ) { 621 // We add a non-null entry to referredBy to indicate the referred page exists 622 m_referredBy.put( page.getName(), new TreeSet<>() ); 623 // Just add a key to refersTo; the keys need to be in sync with referredBy. 624 m_refersTo.put( page.getName(), new TreeSet<>() ); 625 } 626 } catch( final ClassCastException e ) { 627 LOG.fatal( "Invalid collection entry in ReferenceManager.buildKeyLists().", e ); 628 } 629 } 630 631 632 /** 633 * Marks the page as referred to by the referrer. If the page does not exist previously, nothing is done. (This means 634 * that some page, somewhere, has a link to a page that does not exist.) 635 */ 636 private void updateReferredBy( final String page, final String referrer ) { 637 // We're not really interested in first level self-references. 638 /* 639 if( page.equals( referrer ) ) 640 { 641 return; 642 } 643 */ 644 // Neither are we interested if plural forms refer to each other. 645 if( m_matchEnglishPlurals ) { 646 final String p2 = page.endsWith( "s" ) ? page.substring( 0, page.length() - 1 ) : page + "s"; 647 if( referrer.equals( p2 ) ) { 648 return; 649 } 650 } 651 652 // Even if 'page' has not been created yet, it can still be referenced. This requires we don't use m_referredBy 653 // keys when looking up missing pages, of course. 654 final Set< String > referrers = m_referredBy.computeIfAbsent( page, k -> new TreeSet<>() ); 655 referrers.add( referrer ); 656 } 657 658 659 /** 660 * Clears the references to a certain page, so it's no longer in the map. 661 * 662 * @param pagename Name of the page to clear references for. 663 */ 664 @Override 665 public void clearPageEntries( String pagename ) { 666 pagename = getFinalPageName( pagename ); 667 668 // Remove this item from the referredBy list of any page which this item refers to. 669 final Collection< String > c = m_refersTo.get( pagename ); 670 if( c != null ) { 671 for( final String key : c ) { 672 final Collection< ? > dref = m_referredBy.get( key ); 673 dref.remove( pagename ); 674 } 675 } 676 677 // Finally, remove direct references. 678 m_referredBy.remove( pagename ); 679 m_refersTo.remove( pagename ); 680 } 681 682 683 /** 684 * Finds all unreferenced pages. This requires a linear scan through m_referredBy to locate keys with null or empty values. 685 * 686 * @return The Collection of Strings 687 */ 688 @Override 689 public Collection< String > findUnreferenced() { 690 final ArrayList< String > unref = new ArrayList<>(); 691 for( final String key : m_referredBy.keySet() ) { 692 final Set< ? > refs = getReferenceList( m_referredBy, key ); 693 if( refs == null || refs.isEmpty() ) { 694 unref.add( key ); 695 } 696 } 697 698 return unref; 699 } 700 701 702 /** 703 * Finds all references to non-existant pages. This requires a linear scan through m_refersTo values; each value 704 * must have a corresponding key entry in the reference Maps, otherwise such a page has never been created. 705 * <P> 706 * Returns a Collection containing Strings of unreferenced page names. Each non-existant page name is shown only 707 * once - we don't return information on who referred to it. 708 * 709 * @return A Collection of Strings 710 */ 711 @Override 712 public Collection< String > findUncreated() { 713 final TreeSet< String > uncreated; 714 715 // Go through m_refersTo values and check that m_refersTo has the corresponding keys. 716 // We want to reread the code to make sure our HashMaps are in sync... 717 final Collection< Collection< String > > allReferences = m_refersTo.values(); 718 uncreated = allReferences.stream().filter(Objects::nonNull).flatMap(Collection::stream).filter(aReference -> !m_engine.getManager(PageManager.class).wikiPageExists(aReference)).collect(Collectors.toCollection(TreeSet::new)); 719 720 return uncreated; 721 } 722 723 /** 724 * Searches for the given page in the given Map, and returns the set of references. This method also takes care of 725 * English plural matching. 726 * 727 * @param coll The Map to search in 728 * @param pagename The name to find. 729 * @return The references list. 730 */ 731 private < T > Set< T > getReferenceList( final Map< String, Set< T > > coll, final String pagename ) { 732 Set< T > refs = coll.get( pagename ); 733 734 if( m_matchEnglishPlurals ) { 735 // We'll add also matches from the "other" page. 736 final Set< T > refs2; 737 738 if( pagename.endsWith( "s" ) ) { 739 refs2 = coll.get( pagename.substring( 0, pagename.length() - 1 ) ); 740 } else { 741 refs2 = coll.get( pagename + "s" ); 742 } 743 744 if( refs2 != null ) { 745 if( refs != null ) { 746 refs.addAll( refs2 ); 747 } else { 748 refs = refs2; 749 } 750 } 751 } 752 return refs; 753 } 754 755 /** 756 * Find all pages that refer to this page. Returns null if the page does not exist or is not referenced at all, 757 * otherwise returns a collection containing page names (String) that refer to this one. 758 * <p> 759 * @param pagename The page to find referrers for. 760 * @return A Set of Strings. May return null, if the page does not exist, or if it has no references. 761 */ 762 @Override 763 public Set< String > findReferrers( final String pagename ) { 764 final Set< String > refs = getReferenceList( m_referredBy, pagename ); 765 if( refs == null || refs.isEmpty() ) { 766 return null; 767 } 768 769 return refs; 770 } 771 772 /** 773 * Returns all pages that refer to this page. Note that this method returns an unmodifiable Map, which may be abruptly changed. 774 * So any access to any iterator may result in a ConcurrentModificationException. 775 * <p> 776 * The advantages of using this method over findReferrers() is that it is very fast, as it does not create a new object. 777 * The disadvantages are that it does not do any mapping between plural names, and you may end up getting a 778 * ConcurrentModificationException. 779 * 780 * @param pageName Page name to query. 781 * @return A Set of Strings containing the names of all the pages that refer to this page. May return null, if the page does 782 * not exist or has not been indexed yet. 783 * @since 2.2.33 784 */ 785 @Override 786 public Set< String > findReferredBy( final String pageName ) { 787 return m_unmutableReferredBy.get( getFinalPageName(pageName) ); 788 } 789 790 /** 791 * Returns all pages that this page refers to. You can use this as a quick way of getting the links from a page, but note 792 * that it does not link any InterWiki, image, or external links. It does contain attachments, though. 793 * <p> 794 * The Collection returned is unmutable, so you cannot change it. It does reflect the current status and thus is a live 795 * object. So, if you are using any kind of an iterator on it, be prepared for ConcurrentModificationExceptions. 796 * <p> 797 * The returned value is a Collection, because a page may refer to another page multiple times. 798 * 799 * @param pageName Page name to query 800 * @return A Collection of Strings containing the names of the pages that this page refers to. May return null, if the page 801 * does not exist or has not been indexed yet. 802 * @since 2.2.33 803 */ 804 @Override 805 public Collection< String > findRefersTo( final String pageName ) { 806 return m_unmutableRefersTo.get( getFinalPageName( pageName ) ); 807 } 808 809 /** 810 * This 'deepHashCode' can be used to determine if there were any modifications made to the underlying to and by maps of the 811 * ReferenceManager. The maps of the ReferenceManager are not synchronized, so someone could add/remove entries in them while the 812 * hashCode is being computed. 813 * 814 * This method traps and retries if a concurrent modification occurs. 815 * 816 * @return Sum of the hashCodes for the to and by maps of the ReferenceManager 817 * @since 2.3.24 818 */ 819 // 820 // TODO: It is unnecessary to calculate the hashcode; it should be calculated only when the hashmaps are changed. This is slow. 821 // 822 public int deepHashCode() { 823 boolean failed = true; 824 int signature = 0; 825 826 while( failed ) { 827 signature = 0; 828 try { 829 signature ^= m_referredBy.hashCode(); 830 signature ^= m_refersTo.hashCode(); 831 failed = false; 832 } catch ( final ConcurrentModificationException e) { 833 Thread.yield(); 834 } 835 } 836 837 return signature; 838 } 839 840 /** 841 * Returns a list of all pages that the ReferenceManager knows about. This should be roughly equivalent to 842 * PageManager.getAllPages(), but without the potential disk access overhead. Note that this method is not guaranteed 843 * to return a Set of really all pages (especially during startup), but it is very fast. 844 * 845 * @return A Set of all defined page names that ReferenceManager knows about. 846 * @since 2.3.24 847 */ 848 @Override 849 public Set< String > findCreated() { 850 return new HashSet<>( m_refersTo.keySet() ); 851 } 852 853 private String getFinalPageName( final String orig ) { 854 try { 855 final String s = m_engine.getFinalPageName( orig ); 856 return s != null ? s : orig; 857 } catch( final ProviderException e ) { 858 LOG.error( "Error while trying to fetch a page name; trying to cope with the situation.", e ); 859 return orig; 860 } 861 } 862 863 /** 864 * {@inheritDoc} 865 */ 866 @Override 867 public void actionPerformed( final WikiEvent event ) { 868 if( event instanceof WikiPageEvent && event.getType() == WikiPageEvent.PAGE_DELETED ) { 869 final String pageName = ( ( WikiPageEvent ) event ).getPageName(); 870 if( pageName != null ) { 871 pageRemoved( pageName ); 872 } 873 } 874 } 875 876}