001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.commons.text.StringEscapeUtils; 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.apache.oro.text.regex.MalformedPatternException; 026import org.apache.oro.text.regex.MatchResult; 027import org.apache.oro.text.regex.Pattern; 028import org.apache.oro.text.regex.PatternCompiler; 029import org.apache.oro.text.regex.PatternMatcher; 030import org.apache.oro.text.regex.Perl5Compiler; 031import org.apache.oro.text.regex.Perl5Matcher; 032import org.apache.wiki.InternalWikiException; 033import org.apache.wiki.StringTransmutator; 034import org.apache.wiki.api.core.Acl; 035import org.apache.wiki.api.core.Context; 036import org.apache.wiki.api.core.ContextEnum; 037import org.apache.wiki.api.core.Page; 038import org.apache.wiki.api.exceptions.PluginException; 039import org.apache.wiki.api.plugin.Plugin; 040import org.apache.wiki.api.spi.Wiki; 041import org.apache.wiki.attachment.AttachmentManager; 042import org.apache.wiki.auth.AuthorizationManager; 043import org.apache.wiki.auth.UserManager; 044import org.apache.wiki.auth.WikiSecurityException; 045import org.apache.wiki.auth.acl.AclManager; 046import org.apache.wiki.i18n.InternationalizationManager; 047import org.apache.wiki.preferences.Preferences; 048import org.apache.wiki.util.TextUtil; 049import org.apache.wiki.util.XmlUtil; 050import org.apache.wiki.variables.VariableManager; 051import org.jdom2.Attribute; 052import org.jdom2.Content; 053import org.jdom2.Element; 054import org.jdom2.IllegalDataException; 055import org.jdom2.ProcessingInstruction; 056import org.jdom2.Verifier; 057 058import javax.xml.transform.Result; 059import java.io.IOException; 060import java.io.Reader; 061import java.io.StringReader; 062import java.text.MessageFormat; 063import java.util.ArrayList; 064import java.util.Arrays; 065import java.util.Collection; 066import java.util.EmptyStackException; 067import java.util.HashMap; 068import java.util.Iterator; 069import java.util.List; 070import java.util.Map; 071import java.util.Properties; 072import java.util.ResourceBundle; 073import java.util.Stack; 074 075/** 076 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the heart and soul of JSPWiki : make 077 * sure you test properly anything that is added, or else it breaks down horribly. 078 * 079 * @since 2.4 080 */ 081public class JSPWikiMarkupParser extends MarkupParser { 082 083 protected static final int READ = 0; 084 protected static final int EDIT = 1; 085 protected static final int EMPTY = 2; // Empty message 086 protected static final int LOCAL = 3; 087 protected static final int LOCALREF = 4; 088 protected static final int IMAGE = 5; 089 protected static final int EXTERNAL = 6; 090 protected static final int INTERWIKI = 7; 091 protected static final int IMAGELINK = 8; 092 protected static final int IMAGEWIKILINK = 9; 093 protected static final int ATTACHMENT = 10; 094 095 private static final Logger LOG = LogManager.getLogger( JSPWikiMarkupParser.class ); 096 097 private boolean m_isbold; 098 private boolean m_isitalic; 099 private boolean m_istable; 100 private boolean m_isPre; 101 private boolean m_isEscaping; 102 private boolean m_isdefinition; 103 private boolean m_isPreBlock; 104 105 /** Contains style information, in multiple forms. */ 106 private final Stack< Boolean > m_styleStack = new Stack<>(); 107 108 // general list handling 109 private int m_genlistlevel; 110 private final StringBuilder m_genlistBulletBuffer = new StringBuilder( 10 ); // stores the # and * pattern 111 private final boolean m_allowPHPWikiStyleLists = true; 112 113 private boolean m_isOpenParagraph; 114 115 /** Parser for extended link functionality. */ 116 private final LinkParser m_linkParser = new LinkParser(); 117 118 /** Keeps track of any plain text that gets put in the Text nodes */ 119 private StringBuilder m_plainTextBuf = new StringBuilder( 20 ); 120 121 private Element m_currentElement; 122 123 /** Keep track of duplicate header names. */ 124 private final Map< String, Integer > m_titleSectionCounter = new HashMap<>(); 125 126 /** If true, then considers CamelCase links as well. */ 127 private boolean m_camelCaseLinks; 128 129 /** If true, then generate special output for wysiwyg editing in certain cases */ 130 private boolean m_wysiwygEditorMode; 131 132 /** If true, consider URIs that have no brackets as well. */ 133 // FIXME: Currently reserved, but not used. 134 private boolean m_plainUris; 135 136 /** If true, all outward links use a small link image. */ 137 private boolean m_useOutlinkImage = true; 138 139 private boolean m_useAttachmentImage = true; 140 141 /** If true, allows raw HTML. */ 142 private boolean m_allowHTML; 143 144 private boolean m_useRelNofollow; 145 146 private final PatternCompiler m_compiler = new Perl5Compiler(); 147 148 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 149 150 private final PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 151 private Pattern m_camelCasePattern; 152 153 private int m_rowNum = 1; 154 155 private Heading m_lastHeading; 156 157 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 158 159 /** 160 * Creates a markup parser. 161 * 162 * @param context The WikiContext which controls the parsing 163 * @param in Where the data is read from. 164 */ 165 public JSPWikiMarkupParser( final Context context, final Reader in ) { 166 super( context, in ); 167 initialize(); 168 } 169 170 // FIXME: parsers should be pooled for better performance. 171 private void initialize() { 172 initInlineImagePatterns(); 173 174 m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN ); 175 if( m_camelCasePattern == null ) { 176 try { 177 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 178 } catch( final MalformedPatternException e ) { 179 LOG.fatal("Internal error: Someone put in a faulty pattern.",e); 180 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 181 } 182 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 183 } 184 185 // Set the properties. 186 final Properties props = m_engine.getWikiProperties(); 187 final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 188 189 if( cclinks != null ) { 190 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 191 } else { 192 m_camelCaseLinks = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks ); 193 } 194 195 final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE ); 196 if( wysiwygVariable != null ) { 197 m_wysiwygEditorMode = wysiwygVariable; 198 } 199 200 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 201 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 202 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 203 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 204 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 205 206 if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) { 207 disableAccessRules(); 208 } 209 210 m_context.getPage().setHasMetadata(); 211 } 212 213 /** 214 * Calls a transmutator chain. 215 * 216 * @param list Chain to call 217 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 218 * @return The result of the mutation. 219 */ 220 protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) { 221 if( list == null || list.size() == 0 ) { 222 return text; 223 } 224 225 for( final StringTransmutator m : list ) { 226 text = m.mutate( m_context, text ); 227 } 228 229 return text; 230 } 231 232 /** 233 * Calls the heading listeners. 234 * 235 * @param param A Heading object. 236 */ 237 private void callHeadingListenerChain( final Heading param ) { 238 for( final HeadingListener h : m_headingListenerChain ) { 239 h.headingAdded( m_context, param ); 240 } 241 } 242 243 /** 244 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 245 * if you really know what you are doing. 246 * 247 * @param type One of the types above 248 * @param link URL to which to link to 249 * @param text Link text 250 * @param section If a particular section identifier is required. 251 * @return An A element. 252 * @since 2.4.78 253 */ 254 private Element createAnchor( final int type, final String link, String text, String section ) { 255 text = escapeHTMLEntities( text ); 256 section = escapeHTMLEntities( section ); 257 final Element el = new Element( "a" ); 258 el.setAttribute( "class", CLASS_TYPES[ type ] ); 259 el.setAttribute( "href", link + section ); 260 el.addContent( text ); 261 return el; 262 } 263 264 private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) { 265 Element el = null; 266 if( text == null ) { 267 text = link; 268 } 269 text = callMutatorChain( m_linkMutators, text ); 270 section = (section != null) ? ("#"+section) : ""; 271 272 // Make sure we make a link name that can be accepted as a valid URL. 273 if( link.isEmpty() ) { 274 type = EMPTY; 275 } 276 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 277 278 switch( type ) { 279 case READ: 280 el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section ); 281 break; 282 283 case EDIT: 284 el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" ); 285 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 286 break; 287 288 case EMPTY: 289 el = new Element("u").addContent(text); 290 break; 291 292 // These two are for local references - footnotes and references to footnotes. 293 // We embed the page name (or whatever WikiContext gives us) to make sure the links are unique across Wiki. 294 case LOCALREF: 295 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 296 break; 297 298 case LOCAL: 299 el = new Element( "a" ).setAttribute( "class", CLASS_FOOTNOTE ); 300 el.setAttribute( "name", "ref-" + m_context.getName() + "-" + link.substring( 1 ) ); 301 el.addContent( "[" + text + "]" ); 302 break; 303 304 // With the image, external and interwiki types we need to make sure nobody can put in Javascript or 305 // something else annoying into the links themselves. We do this by preventing a haxor from stopping 306 // the link name short with quotes in fillBuffer(). 307 case IMAGE: 308 el = new Element( "img" ).setAttribute( "class", "inline" ); 309 el.setAttribute( "src", link ); 310 el.setAttribute( "alt", text ); 311 break; 312 313 case IMAGELINK: 314 el = new Element( "img" ).setAttribute( "class", "inline" ); 315 el.setAttribute( "src", link ); 316 el.setAttribute( "alt", text ); 317 el = createAnchor( IMAGELINK, text, "", "" ).addContent( el ); 318 break; 319 320 case IMAGEWIKILINK: 321 final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text ); 322 el = new Element( "img" ).setAttribute( "class", "inline" ); 323 el.setAttribute( "src", link ); 324 el.setAttribute( "alt", text ); 325 el = createAnchor( IMAGEWIKILINK, pagelink, "", "" ).addContent( el ); 326 break; 327 328 case EXTERNAL: 329 el = createAnchor( EXTERNAL, link, text, section ); 330 if( m_useRelNofollow ) { 331 el.setAttribute( "rel", "nofollow" ); 332 } 333 break; 334 335 case INTERWIKI: 336 el = createAnchor( INTERWIKI, link, text, section ); 337 break; 338 339 case ATTACHMENT: 340 final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link ); 341 final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link ); 342 final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" ); 343 el = createAnchor( ATTACHMENT, attlink, text, "" ); 344 if( m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) { 345 el.setAttribute("download", ""); 346 } 347 348 pushElement( el ); 349 popElement( el.getName() ); 350 351 if( m_useAttachmentImage ) { 352 el = new Element( "img" ).setAttribute( "src", imglink ); 353 el.setAttribute( "border", "0" ); 354 el.setAttribute( "alt", "(info)" ); 355 356 el = new Element( "a" ).setAttribute( "href", infolink ).addContent( el ); 357 el.setAttribute( "class", "infolink" ); 358 } else { 359 el = null; 360 } 361 break; 362 363 default: 364 break; 365 } 366 367 if( el != null && attributes != null ) { 368 while( attributes.hasNext() ) { 369 final Attribute attr = attributes.next(); 370 if( attr != null ) { 371 el.setAttribute( attr ); 372 } 373 } 374 } 375 376 if( el != null ) { 377 flushPlainText(); 378 m_currentElement.addContent( el ); 379 } 380 return el; 381 } 382 383 /** 384 * These are all the HTML 4.01 block-level elements. 385 */ 386 private static final String[] BLOCK_ELEMENTS = { 387 "address", "blockquote", "div", "dl", "fieldset", "form", 388 "h1", "h2", "h3", "h4", "h5", "h6", 389 "hr", "noscript", "ol", "p", "pre", "table", "ul" 390 }; 391 392 private static boolean isBlockLevel( final String name ) { 393 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 394 } 395 396 /** 397 * This method peeks ahead in the stream until EOL and returns the result. It will keep the buffers untouched. 398 * 399 * @return The string from the current position to the end of line. 400 */ 401 // FIXME: Always returns an empty line, even if the stream is full. 402 private String peekAheadLine() throws IOException { 403 final String s = readUntilEOL().toString(); 404 if( s.length() > PUSHBACK_BUFFER_SIZE ) { 405 LOG.warn( "Line is longer than maximum allowed size (" + PUSHBACK_BUFFER_SIZE + " characters. Attempting to recover..." ); 406 pushBack( s.substring( 0, PUSHBACK_BUFFER_SIZE - 1 ) ); 407 } else { 408 try { 409 pushBack( s ); 410 } catch( final IOException e ) { 411 LOG.warn( "Pushback failed: the line is probably too long. Attempting to recover." ); 412 } 413 } 414 return s; 415 } 416 417 private int flushPlainText() { 418 final int numChars = m_plainTextBuf.length(); 419 if( numChars > 0 ) { 420 String buf; 421 422 if( !m_allowHTML ) { 423 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 424 } else { 425 buf = m_plainTextBuf.toString(); 426 } 427 // We must first empty the buffer because the side effect of calling makeCamelCaseLink() is to call this routine. 428 m_plainTextBuf = new StringBuilder(20); 429 try { 430 // This is the heaviest part of parsing, and therefore we can do some optimization here. 431 // 1) Only when the length of the buffer is big enough, we try to do the match 432 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) { 433 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) { 434 final MatchResult result = m_camelCaseMatcher.getMatch(); 435 final String firstPart = buf.substring( 0, result.beginOffset( 0 ) ); 436 String prefix = result.group( 1 ); 437 if( prefix == null ) { 438 prefix = ""; 439 } 440 441 final String camelCase = result.group(2); 442 final String protocol = result.group(3); 443 String uri = protocol+result.group(4); 444 buf = buf.substring(result.endOffset(0)); 445 446 m_currentElement.addContent( firstPart ); 447 // Check if the user does not wish to do URL or WikiWord expansion 448 if( prefix.endsWith( "~" ) || prefix.indexOf( '[' ) != -1 ) { 449 if( prefix.endsWith( "~" ) ) { 450 if( m_wysiwygEditorMode ) { 451 m_currentElement.addContent( "~" ); 452 } 453 prefix = prefix.substring( 0, prefix.length() - 1 ); 454 } 455 if( camelCase != null ) { 456 m_currentElement.addContent( prefix + camelCase ); 457 } else if( protocol != null ) { 458 m_currentElement.addContent( prefix + uri ); 459 } 460 continue; 461 } 462 463 // Fine, then let's check what kind of link this was and emit the proper elements 464 if( protocol != null ) { 465 final char c = uri.charAt( uri.length() - 1 ); 466 if( c == '.' || c == ',' ) { 467 uri = uri.substring( 0, uri.length() - 1 ); 468 buf = c + buf; 469 } 470 // System.out.println("URI match "+uri); 471 m_currentElement.addContent( prefix ); 472 makeDirectURILink( uri ); 473 } else { 474 // System.out.println("Matched: '"+camelCase+"'"); 475 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 476 // System.out.println("prefix="+prefix); 477 m_currentElement.addContent( prefix ); 478 makeCamelCaseLink( camelCase ); 479 } 480 } 481 m_currentElement.addContent( buf ); 482 } else { 483 // No camelcase asked for, just add the elements 484 m_currentElement.addContent( buf ); 485 } 486 } catch( final IllegalDataException e ) { 487 // Sometimes it's possible that illegal XML chars is added to the data. Here we make sure it does not stop parsing. 488 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 489 } 490 } 491 492 return numChars; 493 } 494 495 /** 496 * Escapes XML entities in a HTML-compatible way (i.e. does not escape entities that are already escaped). 497 * 498 * @param buf 499 * @return An escaped string. 500 */ 501 private String escapeHTMLEntities( final String buf ) { 502 final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 503 for( int i = 0; i < buf.length(); i++ ) { 504 final char ch = buf.charAt(i); 505 if( ch == '<' ) { 506 tmpBuf.append("<"); 507 } else if( ch == '>' ) { 508 tmpBuf.append(">"); 509 } else if( ch == '\"' ) { 510 tmpBuf.append("""); 511 } else if( ch == '&' ) { 512 // If the following is an XML entity reference (&#.*;) we'll leave it as it is; otherwise we'll replace it with an & 513 boolean isEntity = false; 514 final StringBuilder entityBuf = new StringBuilder(); 515 if( i < buf.length() -1 ) { 516 for( int j = i; j < buf.length(); j++ ) { 517 final char ch2 = buf.charAt( j ); 518 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) { 519 entityBuf.append(ch2); 520 if( ch2 == ';' ) { 521 isEntity = true; 522 break; 523 } 524 } else { 525 break; 526 } 527 } 528 } 529 530 if( isEntity ) { 531 tmpBuf.append( entityBuf ); 532 i = i + entityBuf.length() - 1; 533 } else { 534 tmpBuf.append( "&" ); 535 } 536 537 } else { 538 tmpBuf.append( ch ); 539 } 540 } 541 542 return tmpBuf.toString(); 543 } 544 545 private Element pushElement( final Element e ) { 546 flushPlainText(); 547 m_currentElement.addContent( e ); 548 m_currentElement = e; 549 550 return e; 551 } 552 553 private Element addElement( final Content e ) { 554 if( e != null ) { 555 flushPlainText(); 556 m_currentElement.addContent( e ); 557 } 558 return m_currentElement; 559 } 560 561 /** 562 * All elements that can be empty by the HTML DTD. 563 */ 564 // Keep sorted. 565 private static final String[] EMPTY_ELEMENTS = { 566 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 567 }; 568 569 /** 570 * Goes through the current element stack and pops all elements until this 571 * element is found - this essentially "closes" and element. 572 * 573 * @param s element to be found. 574 * @return The new current element, or null, if there was no such element in the entire stack. 575 */ 576 private Element popElement( final String s ) { 577 final int flushedBytes = flushPlainText(); 578 Element currEl = m_currentElement; 579 while( currEl.getParentElement() != null ) { 580 if( currEl.getName().equals( s ) && !currEl.isRootElement() ) { 581 m_currentElement = currEl.getParentElement(); 582 583 // Check if it's okay for this element to be empty. Then we will 584 // trick the JDOM generator into not generating an empty element, 585 // by putting an empty string between the tags. Yes, it's a kludge 586 // but what'cha gonna do about it. :-) 587 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) { 588 currEl.addContent( "" ); 589 } 590 return m_currentElement; 591 } 592 currEl = currEl.getParentElement(); 593 } 594 return null; 595 } 596 597 598 /** 599 * Reads the stream until it meets one of the specified ending characters, or stream end. The ending 600 * character will be left in the stream. 601 */ 602 private String readUntil( final String endChars ) throws IOException { 603 final StringBuilder sb = new StringBuilder( 80 ); 604 int ch = nextToken(); 605 while( ch != -1 ) { 606 if( ch == '\\' ) { 607 ch = nextToken(); 608 if( ch == -1 ) { 609 break; 610 } 611 } else { 612 if( endChars.indexOf( ( char )ch ) != -1 ) { 613 pushBack( ch ); 614 break; 615 } 616 } 617 sb.append( ( char )ch ); 618 ch = nextToken(); 619 } 620 621 return sb.toString(); 622 } 623 624 /** 625 * Reads the stream while the characters that have been specified are 626 * in the stream, returning then the result as a String. 627 */ 628 private String readWhile( final String endChars ) throws IOException { 629 final StringBuilder sb = new StringBuilder( 80 ); 630 int ch = nextToken(); 631 while( ch != -1 ) { 632 if( endChars.indexOf( ( char ) ch ) == -1 ) { 633 pushBack( ch ); 634 break; 635 } 636 sb.append( ( char ) ch ); 637 ch = nextToken(); 638 } 639 640 return sb.toString(); 641 } 642 643 private JSPWikiMarkupParser m_cleanTranslator; 644 645 /** 646 * Does a lazy init. Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before 647 * the TranslatorReader it is contained by is up. 648 */ 649 private JSPWikiMarkupParser getCleanTranslator() { 650 if( m_cleanTranslator == null ) { 651 final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() ); 652 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 653 m_cleanTranslator.m_allowHTML = true; 654 } 655 656 return m_cleanTranslator; 657 } 658 659 /** 660 * Modifies the "hd" parameter to contain proper values. Because 661 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 662 * % after url encoding with '_'. 663 * <p> 664 * Counts also duplicate headings (= headings with similar name), and 665 * attaches a counter. 666 */ 667 protected String makeHeadingAnchor( final String baseName, String title, final Heading hd ) { 668 hd.m_titleText = title; 669 title = MarkupParser.wikifyLink( title ); 670 hd.m_titleSection = m_engine.encodeName(title); 671 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) { 672 final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1; 673 m_titleSectionCounter.put( hd.m_titleSection, count ); 674 hd.m_titleSection += "-" + count; 675 } else { 676 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 677 } 678 679 hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection; 680 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 681 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 682 683 return hd.m_titleAnchor; 684 } 685 686 private String makeSectionTitle( String title ) { 687 title = title.trim(); 688 try { 689 final JSPWikiMarkupParser dtr = getCleanTranslator(); 690 dtr.setInputReader( new StringReader( title ) ); 691 final WikiDocument doc = dtr.parse(); 692 doc.setContext( m_context ); 693 694 return XmlUtil.extractTextFromDocument( doc ); 695 } catch( final IOException e ) { 696 LOG.fatal("Title parsing not working", e ); 697 throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e ); 698 } 699 } 700 701 /** 702 * Returns XHTML for the heading. 703 * 704 * @param level The level of the heading. @see Heading 705 * @param title the title for the heading 706 * @param hd a List to which heading should be added 707 * @return An Element containing the heading 708 */ 709 public Element makeHeading( final int level, final String title, final Heading hd ) { 710 final Element el; 711 final String pageName = m_context.getPage().getName(); 712 final String outTitle = makeSectionTitle( title ); 713 hd.m_level = level; 714 715 switch( level ) { 716 case Heading.HEADING_SMALL: 717 el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 718 break; 719 720 case Heading.HEADING_MEDIUM: 721 el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 722 break; 723 724 case Heading.HEADING_LARGE: 725 el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 726 break; 727 728 default: 729 throw new InternalWikiException( "Illegal heading type " + level ); 730 } 731 732 return el; 733 } 734 735 /** 736 * When given a link to a WikiName, we just return a proper HTML link for it. The local link mutator 737 * chain is also called. 738 */ 739 private Element makeCamelCaseLink( final String wikiname ) { 740 final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 741 callMutatorChain( m_localLinkMutatorChain, wikiname ); 742 if( matchedLink != null ) { 743 makeLink( READ, matchedLink, wikiname, null, null ); 744 } else { 745 makeLink( EDIT, wikiname, wikiname, null, null ); 746 } 747 748 return m_currentElement; 749 } 750 751 /** Holds the image URL for the duration of this parser */ 752 private String m_outlinkImageURL; 753 754 /** 755 * Returns an element for the external link image (out.png). However, this method caches the URL for the lifetime 756 * of this MarkupParser, because it's commonly used, and we'll end up with possibly hundreds our thousands of 757 * references to it... It's a lot faster, too. 758 * 759 * @return An element containing the HTML for the outlink image. 760 */ 761 private Element outlinkImage() { 762 Element el = null; 763 if( m_useOutlinkImage ) { 764 if( m_outlinkImageURL == null ) { 765 m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE ); 766 } 767 768 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 769 el.setAttribute( "src", m_outlinkImageURL ); 770 el.setAttribute( "alt","" ); 771 } 772 773 return el; 774 } 775 776 /** 777 * Takes a URL and turns it into a regular wiki link. Unfortunately, because of the way that flushPlainText() 778 * works, it already encodes all the XML entities. But so does WikiContext.getURL(), so we 779 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 780 * <p> 781 * What a crappy problem. 782 * 783 * @param url provided url. 784 * @return An anchor Element containing the link. 785 */ 786 private Element makeDirectURILink( String url ) { 787 final Element result; 788 String last = null; 789 790 if( url.endsWith( "," ) || url.endsWith( "." ) ) { 791 last = url.substring( url.length() - 1 ); 792 url = url.substring( 0, url.length() - 1 ); 793 } 794 795 callMutatorChain( m_externalLinkMutatorChain, url ); 796 797 if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) { 798 result = handleImageLink( StringUtils.replace( url, "&", "&" ), url, false ); 799 } else { 800 result = makeLink( EXTERNAL, StringUtils.replace( url, "&", "&" ), url, null, null ); 801 addElement( outlinkImage() ); 802 } 803 804 if( last != null ) { 805 m_plainTextBuf.append( last ); 806 } 807 808 return result; 809 } 810 811 /** 812 * Image links are handled differently: 813 * 1. If the text is a WikiName of an existing page, it gets linked. 814 * 2. If the text is an external link, then it is inlined. 815 * 3. Otherwise, it becomes an ALT text. 816 * 817 * @param reallink The link to the image. 818 * @param link Link text portion, may be a link to somewhere else. 819 * @param hasLinkText If true, then the defined link had a link text available. 820 * This means that the link text may be a link to a wiki page, 821 * or an external resource. 822 */ 823 private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) { 824 final String possiblePage = MarkupParser.cleanLink( link ); 825 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) { 826 return makeLink( IMAGELINK, reallink, link, null, null ); 827 } else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) { 828 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 829 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 830 } else { 831 return makeLink( IMAGE, reallink, link, null, null ); 832 } 833 } 834 835 private Element handleAccessRule( String ruleLine ) { 836 if( m_wysiwygEditorMode ) { 837 m_currentElement.addContent( "[" + ruleLine + "]" ); 838 } 839 if( !m_parseAccessRules ) { 840 return m_currentElement; 841 } 842 final Page page = m_context.getRealPage(); 843 // UserDatabase db = m_context.getEngine().getUserDatabase(); 844 845 if( ruleLine.startsWith( "{" ) ) { 846 ruleLine = ruleLine.substring( 1 ); 847 } 848 849 if( ruleLine.endsWith( "}" ) ) { 850 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 851 } 852 853 LOG.debug("page={}, ACL = {}", page.getName(), ruleLine); 854 855 try { 856 final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine ); 857 page.setAcl( acl ); 858 LOG.debug( acl.toString() ); 859 } catch( final WikiSecurityException wse ) { 860 return makeError( wse.getMessage() ); 861 } 862 863 return m_currentElement; 864 } 865 866 /** 867 * Handles metadata setting [{SET foo=bar}] 868 */ 869 private Element handleMetadata( final String link ) { 870 if( m_wysiwygEditorMode ) { 871 m_currentElement.addContent( "[" + link + "]" ); 872 } 873 874 try { 875 final String args = link.substring( link.indexOf(' '), link.length()-1 ); 876 final String name = args.substring( 0, args.indexOf('=') ).trim(); 877 String val = args.substring( args.indexOf('=')+1 ).trim(); 878 879 if( val.startsWith("'") ) { 880 val = val.substring( 1 ); 881 } 882 if( val.endsWith("'") ) { 883 val = val.substring( 0, val.length()-1 ); 884 } 885 886 // LOG.debug("SET name='"+name+"', value='"+val+"'."); 887 888 if( !name.isEmpty() && !val.isEmpty() ) { 889 val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val ); 890 m_context.getPage().setAttribute( name, val ); 891 } 892 } catch( final Exception e ) { 893 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 894 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 895 } 896 897 return m_currentElement; 898 } 899 900 /** 901 * Emits a processing instruction that will disable markup escaping. This is 902 * very useful if you want to emit HTML directly into the stream. 903 */ 904 private void disableOutputEscaping() { 905 addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) ); 906 } 907 908 /** 909 * Gobbles up all hyperlinks that are encased in square brackets. 910 */ 911 private Element handleHyperlinks( String linktext, final int pos ) { 912 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 913 final StringBuilder sb = new StringBuilder( linktext.length() + 80 ); 914 915 if( m_linkParsingOperations.isAccessRule( linktext ) ) { 916 return handleAccessRule( linktext ); 917 } 918 919 if( m_linkParsingOperations.isMetadata( linktext ) ) { 920 return handleMetadata( linktext ); 921 } 922 923 if( m_linkParsingOperations.isPluginLink( linktext ) ) { 924 try { 925 final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 926 927 // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not. 928 if( pluginContent != null ) { 929 addElement( pluginContent ); 930 pluginContent.executeParse( m_context ); 931 } 932 } catch( final PluginException e ) { 933 LOG.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 934 //LOG.info( "Root cause:",e.getRootThrowable() ); 935 if( !m_wysiwygEditorMode ) { 936 final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE ); 937 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 938 m_context.getRealPage().getWiki(), 939 m_context.getRealPage().getName(), 940 e.getMessage() ) ) ); 941 } 942 } 943 return m_currentElement; 944 } 945 946 try { 947 final LinkParser.Link link = m_linkParser.parse( linktext ); 948 linktext = link.getText(); 949 String linkref = link.getReference(); 950 // Yes, we now have the components separated. 951 // linktext = the text the link should have 952 // linkref = the url or page name. 953 // In many cases these are the same. [linktext|linkref]. 954 if( m_linkParsingOperations.isVariableLink( linktext ) ) { 955 final Content el = new VariableContent( linktext ); 956 addElement( el ); 957 } else if( m_linkParsingOperations.isExternalLink( linkref ) ) { 958 // It's an external link, out of this Wiki 959 callMutatorChain( m_externalLinkMutatorChain, linkref ); 960 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 961 handleImageLink( linkref, linktext, link.hasReference() ); 962 } else { 963 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 964 addElement( outlinkImage() ); 965 } 966 } else if( link.isInterwikiLink() ) { 967 // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved. 968 969 // FIXME: There is an interesting issue here: We probably should 970 // URLEncode the wikiPage, but we can't since some of the 971 // Wikis use slashes (/), which won't survive URLEncoding. 972 // Besides, we don't know which character set the other Wiki 973 // is using, so you'll have to write the entire name as it appears 974 // in the URL. Bugger. 975 976 final String extWiki = link.getExternalWiki(); 977 final String wikiPage = link.getExternalWikiPage(); 978 if( m_wysiwygEditorMode ) { 979 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 980 } else { 981 String urlReference = m_engine.getInterWikiURL( extWiki ); 982 if( urlReference != null ) { 983 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 984 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 985 986 if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) { 987 handleImageLink( urlReference, linktext, link.hasReference() ); 988 } else { 989 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 990 } 991 if( m_linkParsingOperations.isExternalLink( urlReference ) ) { 992 addElement( outlinkImage() ); 993 } 994 } else { 995 final Object[] args = { escapeHTMLEntities( extWiki ) }; 996 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 997 } 998 } 999 } else if( linkref.startsWith( "#" ) ) { 1000 // It defines a local footnote 1001 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1002 } else if( TextUtil.isNumber( linkref ) ) { 1003 // It defines a reference to a local footnote 1004 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1005 } else { 1006 final int hashMark; 1007 1008 // Internal wiki link, but is it an attachment link? 1009 String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref ); 1010 if( attachment != null ) { 1011 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1012 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 1013 attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment ); 1014 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1015 } else { 1016 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1017 } 1018 } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) { 1019 // It's an internal Wiki link, but to a named section 1020 final String namedSection = linkref.substring( hashMark + 1 ); 1021 linkref = linkref.substring( 0, hashMark ); 1022 linkref = MarkupParser.cleanLink( linkref ); 1023 callMutatorChain( m_localLinkMutatorChain, linkref ); 1024 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1025 if( matchedLink != null ) { 1026 String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) ); 1027 sectref = sectref.replace( '%', '_' ); 1028 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1029 } else { 1030 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1031 } 1032 } else { 1033 // It's an internal Wiki link 1034 linkref = MarkupParser.cleanLink( linkref ); 1035 callMutatorChain( m_localLinkMutatorChain, linkref ); 1036 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1037 if( matchedLink != null ) { 1038 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1039 } else { 1040 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1041 } 1042 } 1043 } 1044 1045 } catch( final ParseException e ) { 1046 LOG.info( "Parser failure: ", e ); 1047 final Object[] args = { e.getMessage() }; 1048 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1049 } 1050 return m_currentElement; 1051 } 1052 1053 /** 1054 * Pushes back any string that has been read. It will obviously be pushed back in a reverse order. 1055 * 1056 * @since 2.1.77 1057 */ 1058 private void pushBack( final String s ) throws IOException { 1059 for( int i = s.length()-1; i >= 0; i-- ) { 1060 pushBack( s.charAt(i) ); 1061 } 1062 } 1063 1064 private Element handleBackslash() throws IOException { 1065 final int ch = nextToken(); 1066 if( ch == '\\' ) { 1067 final int ch2 = nextToken(); 1068 if( ch2 == '\\' ) { 1069 pushElement( new Element( "br" ).setAttribute( "clear", "all" ) ); 1070 return popElement( "br" ); 1071 } 1072 pushBack( ch2 ); 1073 pushElement( new Element( "br" ) ); 1074 return popElement( "br" ); 1075 } 1076 pushBack( ch ); 1077 return null; 1078 } 1079 1080 private Element handleUnderscore() throws IOException { 1081 final int ch = nextToken(); 1082 Element el = null; 1083 if( ch == '_' ) { 1084 if( m_isbold ) { 1085 el = popElement( "b" ); 1086 } else { 1087 el = pushElement( new Element( "b" ) ); 1088 } 1089 m_isbold = !m_isbold; 1090 } else { 1091 pushBack( ch ); 1092 } 1093 1094 return el; 1095 } 1096 1097 1098 /** 1099 * For example: italics. 1100 */ 1101 private Element handleApostrophe() throws IOException { 1102 final int ch = nextToken(); 1103 Element el = null; 1104 1105 if( ch == '\'' ) { 1106 if( m_isitalic ) { 1107 el = popElement( "i" ); 1108 } else { 1109 el = pushElement( new Element( "i" ) ); 1110 } 1111 m_isitalic = !m_isitalic; 1112 } else { 1113 pushBack( ch ); 1114 } 1115 1116 return el; 1117 } 1118 1119 private Element handleOpenbrace( final boolean isBlock ) throws IOException { 1120 final int ch = nextToken(); 1121 if( ch == '{' ) { 1122 final int ch2 = nextToken(); 1123 if( ch2 == '{' ) { 1124 m_isPre = true; 1125 m_isEscaping = true; 1126 m_isPreBlock = isBlock; 1127 if( isBlock ) { 1128 startBlockLevel(); 1129 return pushElement( new Element( "pre" ) ); 1130 } 1131 1132 return pushElement( new Element( "span" ).setAttribute( "class", "inline-code" ) ); 1133 } 1134 pushBack( ch2 ); 1135 return pushElement( new Element( "tt" ) ); 1136 } 1137 pushBack( ch ); 1138 return null; 1139 } 1140 1141 /** 1142 * Handles both }} and }}} 1143 */ 1144 private Element handleClosebrace() throws IOException { 1145 final int ch2 = nextToken(); 1146 if( ch2 == '}' ) { 1147 final int ch3 = nextToken(); 1148 if( ch3 == '}' ) { 1149 if( m_isPre ) { 1150 if( m_isPreBlock ) { 1151 popElement( "pre" ); 1152 } else { 1153 popElement( "span" ); 1154 } 1155 m_isPre = false; 1156 m_isEscaping = false; 1157 return m_currentElement; 1158 } 1159 m_plainTextBuf.append( "}}}" ); 1160 return m_currentElement; 1161 } 1162 pushBack( ch3 ); 1163 if( !m_isEscaping ) { 1164 return popElement( "tt" ); 1165 } 1166 } 1167 pushBack( ch2 ); 1168 return null; 1169 } 1170 1171 private Element handleDash() throws IOException { 1172 int ch = nextToken(); 1173 if( ch == '-' ) { 1174 final int ch2 = nextToken(); 1175 if( ch2 == '-' ) { 1176 final int ch3 = nextToken(); 1177 if( ch3 == '-' ) { 1178 // Empty away all the rest of the dashes. 1179 // Do not forget to return the first non-match back. 1180 do { 1181 ch = nextToken(); 1182 } while ( ch == '-' ); 1183 1184 pushBack( ch ); 1185 startBlockLevel(); 1186 pushElement( new Element( "hr" ) ); 1187 return popElement( "hr" ); 1188 } 1189 pushBack( ch3 ); 1190 } 1191 pushBack( ch2 ); 1192 } 1193 pushBack( ch ); 1194 return null; 1195 } 1196 1197 private Element handleHeading() throws IOException { 1198 final Element el; 1199 final int ch = nextToken(); 1200 final Heading hd = new Heading(); 1201 if( ch == '!' ) { 1202 final int ch2 = nextToken(); 1203 if( ch2 == '!' ) { 1204 final String title = peekAheadLine(); 1205 el = makeHeading( Heading.HEADING_LARGE, title, hd ); 1206 } else { 1207 pushBack( ch2 ); 1208 final String title = peekAheadLine(); 1209 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1210 } 1211 } else { 1212 pushBack( ch ); 1213 final String title = peekAheadLine(); 1214 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1215 } 1216 1217 callHeadingListenerChain( hd ); 1218 m_lastHeading = hd; 1219 if( el != null ) { 1220 pushElement( el ); 1221 } 1222 return el; 1223 } 1224 1225 /** 1226 * Reads the stream until the next EOL or EOF. Note that it will also read the EOL from the stream. 1227 */ 1228 private StringBuilder readUntilEOL() throws IOException { 1229 int ch; 1230 final StringBuilder buf = new StringBuilder( 256 ); 1231 while( true ) { 1232 ch = nextToken(); 1233 if( ch == -1 ) { 1234 break; 1235 } 1236 buf.append( (char) ch ); 1237 if( ch == '\n' ) { 1238 break; 1239 } 1240 } 1241 return buf; 1242 } 1243 1244 /** Controls whether italic is restarted after a paragraph shift */ 1245 1246 private boolean m_restartitalic; 1247 private boolean m_restartbold; 1248 1249 private boolean m_newLine; 1250 1251 /** 1252 * Starts a block level element, therefore closing a potential open paragraph tag. 1253 */ 1254 private void startBlockLevel() { 1255 // These may not continue over block level limits in XHTML 1256 popElement( "i" ); 1257 popElement( "b" ); 1258 popElement( "tt" ); 1259 if( m_isOpenParagraph ) { 1260 m_isOpenParagraph = false; 1261 popElement( "p" ); 1262 m_plainTextBuf.append( "\n" ); // Just small beautification 1263 } 1264 m_restartitalic = m_isitalic; 1265 m_restartbold = m_isbold; 1266 m_isitalic = false; 1267 m_isbold = false; 1268 } 1269 1270 private static String getListType( final char c ) { 1271 if( c == '*' ) { 1272 return "ul"; 1273 } else if( c == '#' ) { 1274 return "ol"; 1275 } 1276 throw new InternalWikiException( "Parser got faulty list type: " + c ); 1277 } 1278 /** 1279 * Like original handleOrderedList() and handleUnorderedList(), 1280 * however handles both ordered ('#') and unordered ('*') mixed together. 1281 */ 1282 // FIXME: Refactor this; it's a bit messy. 1283 private Element handleGeneralList() throws IOException { 1284 startBlockLevel(); 1285 String strBullets = readWhile( "*#" ); 1286 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1287 final int numBullets = strBullets.length(); 1288 1289 // override the beginning portion of bullet pattern to be like the previous to simulate PHPWiki style lists 1290 1291 if( m_allowPHPWikiStyleLists ) { 1292 // only substitute if different 1293 if( !( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) ) { 1294 if( numBullets <= m_genlistlevel ) { 1295 // Substitute all but the last character (keep the expressed bullet preference) 1296 strBullets = ( numBullets > 1 ? m_genlistBulletBuffer.substring( 0, numBullets - 1 ) : "" ) + 1297 strBullets.charAt( numBullets - 1 ); 1298 } else { 1299 strBullets = m_genlistBulletBuffer + strBullets.substring( m_genlistlevel, numBullets ); 1300 } 1301 } 1302 } 1303 1304 // Check if this is still of the same type 1305 if( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) { 1306 if( numBullets > m_genlistlevel ) { 1307 pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel++ ) ) ) ); 1308 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) { 1309 // bullets are growing, get from new bullet list 1310 pushElement( new Element( "li" ) ); 1311 pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel ) ) ) ); 1312 } 1313 } else if( numBullets < m_genlistlevel ) { 1314 // Close the previous list item. 1315 popElement( "li" ); 1316 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) { 1317 // bullets are shrinking, get from old bullet list 1318 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1319 if( m_genlistlevel > 0 ) { 1320 popElement( "li" ); 1321 } 1322 } 1323 } else { 1324 if( m_genlistlevel > 0 ) { 1325 popElement( "li" ); 1326 } 1327 } 1328 } else { 1329 // The pattern has changed, unwind and restart 1330 int numEqualBullets; 1331 final int numCheckBullets; 1332 1333 // find out how much is the same 1334 numEqualBullets = 0; 1335 numCheckBullets = Math.min( numBullets, m_genlistlevel ); 1336 1337 while( numEqualBullets < numCheckBullets ) { 1338 // if the bullets are equal so far, keep going 1339 if( strBullets.charAt( numEqualBullets ) == m_genlistBulletBuffer.charAt( numEqualBullets ) ) 1340 numEqualBullets++; 1341 // otherwise giveup, we have found how many are equal 1342 else 1343 break; 1344 } 1345 1346 //unwind 1347 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) { 1348 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1349 if( m_genlistlevel > numBullets ) { 1350 popElement( "li" ); 1351 } 1352 } 1353 1354 //rewind 1355 pushElement( new Element( getListType( strBullets.charAt( numEqualBullets++ ) ) ) ); 1356 for( int i = numEqualBullets; i < numBullets; i++ ) { 1357 pushElement( new Element( "li" ) ); 1358 pushElement( new Element( getListType( strBullets.charAt( i ) ) ) ); 1359 } 1360 m_genlistlevel = numBullets; 1361 } 1362 1363 // Push a new list item, and eat away any extra whitespace 1364 pushElement( new Element( "li" ) ); 1365 readWhile( " " ); 1366 1367 // work done, remember the new bullet list (in place of old one) 1368 m_genlistBulletBuffer.setLength( 0 ); 1369 m_genlistBulletBuffer.append( strBullets ); 1370 return m_currentElement; 1371 } 1372 1373 private Element unwindGeneralList() { 1374 // unwind 1375 for( ; m_genlistlevel > 0; m_genlistlevel-- ) { 1376 popElement( "li" ); 1377 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1378 } 1379 m_genlistBulletBuffer.setLength( 0 ); 1380 return null; 1381 } 1382 1383 1384 private Element handleDefinitionList() { 1385 if( !m_isdefinition ) { 1386 m_isdefinition = true; 1387 startBlockLevel(); 1388 pushElement( new Element( "dl" ) ); 1389 return pushElement( new Element( "dt" ) ); 1390 } 1391 return null; 1392 } 1393 1394 private Element handleOpenbracket() throws IOException { 1395 final StringBuilder sb = new StringBuilder( 40 ); 1396 final int pos = getPosition(); 1397 int ch = nextToken(); 1398 boolean isPlugin = false; 1399 if( ch == '[' ) { 1400 if( m_wysiwygEditorMode ) { 1401 sb.append( '[' ); 1402 } 1403 sb.append( ( char )ch ); 1404 while( ( ch = nextToken() ) == '[' ) { 1405 sb.append( ( char )ch ); 1406 } 1407 } 1408 1409 if( ch == '{' ) { 1410 isPlugin = true; 1411 } 1412 1413 pushBack( ch ); 1414 1415 if( sb.length() > 0 ) { 1416 m_plainTextBuf.append( sb ); 1417 return m_currentElement; 1418 } 1419 1420 // Find end of hyperlink 1421 ch = nextToken(); 1422 int nesting = 1; // Check for nested plugins 1423 while( ch != -1 ) { 1424 final int ch2 = nextToken(); 1425 pushBack( ch2 ); 1426 if( isPlugin ) { 1427 if( ch == '[' && ch2 == '{' ) { 1428 nesting++; 1429 } else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) { 1430 break; 1431 } else if( ch == '}' && ch2 == ']' ) { 1432 // NB: This will be decremented once at the end 1433 nesting--; 1434 } 1435 } else { 1436 if( ch == ']' ) { 1437 break; 1438 } 1439 } 1440 1441 sb.append( (char) ch ); 1442 1443 ch = nextToken(); 1444 } 1445 1446 // If the link is never finished, do some tricks to display the rest of the line unchanged. 1447 if( ch == -1 ) { 1448 LOG.debug( "Warning: unterminated link detected!" ); 1449 m_isEscaping = true; 1450 m_plainTextBuf.append( sb ); 1451 flushPlainText(); 1452 m_isEscaping = false; 1453 return m_currentElement; 1454 } 1455 1456 return handleHyperlinks( sb.toString(), pos ); 1457 } 1458 1459 /** 1460 * Reads the stream until the current brace is closed or stream end. 1461 */ 1462 private String readBraceContent( final char opening, final char closing ) throws IOException { 1463 final StringBuilder sb = new StringBuilder( 40 ); 1464 int braceLevel = 1; 1465 int ch; 1466 while( ( ch = nextToken() ) != -1 ) { 1467 if( ch == '\\' ) { 1468 continue; 1469 } else if( ch == opening ) { 1470 braceLevel++; 1471 } else if( ch == closing ) { 1472 braceLevel--; 1473 if( braceLevel == 0 ) { 1474 break; 1475 } 1476 } 1477 sb.append( ( char ) ch ); 1478 } 1479 return sb.toString(); 1480 } 1481 1482 1483 /** 1484 * Handles constructs of type %%(style) and %%class 1485 * @param newLine 1486 * @return An Element containing the div or span, depending on the situation. 1487 * @throws IOException 1488 */ 1489 private Element handleDiv( final boolean newLine ) throws IOException { 1490 int ch = nextToken(); 1491 Element el = null; 1492 1493 if( ch == '%' ) { 1494 String style = null; 1495 String clazz = null; 1496 1497 ch = nextToken(); 1498 1499 // Style or class? 1500 if( ch == '(' ) { 1501 style = readBraceContent('(',')'); 1502 } else if( Character.isLetter( (char) ch ) ) { 1503 pushBack( ch ); 1504 clazz = readUntil( "( \t\n\r" ); 1505 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 1506 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 1507 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 1508 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 1509 1510 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 1511 //(2) remove any invalid character 1512 if( clazz != null ) { 1513 clazz = clazz.replace( '.', ' ' ) 1514 .replaceAll( "[^\\s-_\\w\\x200-\\x377]+", "" ); 1515 } 1516 ch = nextToken(); 1517 1518 // check for %%class1.class2( style information ) 1519 if( ch == '(' ) { 1520 style = readBraceContent( '(', ')' ); 1521 // Pop out only spaces, so that the upcoming EOL check does not check the next line. 1522 } else if( ch == '\n' || ch == '\r' ) { 1523 pushBack( ch ); 1524 } 1525 } else { 1526 // Anything else stops. 1527 pushBack( ch ); 1528 try { 1529 final Boolean isSpan = m_styleStack.pop(); 1530 if( isSpan == null ) { 1531 // Fail quietly 1532 } else if( isSpan ) { 1533 el = popElement( "span" ); 1534 } else { 1535 el = popElement( "div" ); 1536 } 1537 } catch( final EmptyStackException e ) { 1538 LOG.debug( "Page '" + m_context.getName() + "' closes a %%-block that has not been opened." ); 1539 return m_currentElement; 1540 } 1541 return el; 1542 } 1543 1544 // Check if there is an attempt to do something nasty 1545 try { 1546 style = StringEscapeUtils.unescapeHtml4(style); 1547 if( style != null && style.contains( "javascript:" ) ) { 1548 LOG.debug( "Attempt to output javascript within CSS: {}", style ); 1549 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1550 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 1551 } 1552 } catch( final NumberFormatException e ) { 1553 // If there are unknown entities, we don't want the parser to stop. 1554 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1555 final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 1556 return addElement( makeError( msg ) ); 1557 } 1558 1559 // Decide if we should open a div or a span? 1560 final String eol = peekAheadLine(); 1561 1562 if( !eol.trim().isEmpty() ) { 1563 // There is stuff after the class 1564 el = new Element("span"); 1565 m_styleStack.push( Boolean.TRUE ); 1566 } else { 1567 startBlockLevel(); 1568 el = new Element("div"); 1569 m_styleStack.push( Boolean.FALSE ); 1570 } 1571 1572 if( style != null ) el.setAttribute("style", style); 1573 if( clazz != null ) el.setAttribute("class", clazz); 1574 return pushElement( el ); 1575 } 1576 pushBack( ch ); 1577 return el; 1578 } 1579 1580 private Element handleSlash( final boolean newLine ) throws IOException { 1581 final int ch = nextToken(); 1582 pushBack( ch ); 1583 if( ch == '%' && !m_styleStack.isEmpty() ) { 1584 return handleDiv( newLine ); 1585 } 1586 1587 return null; 1588 } 1589 1590 private Element handleBar( final boolean newLine ) throws IOException { 1591 Element el; 1592 if( !m_istable && !newLine ) { 1593 return null; 1594 } 1595 1596 // If the bar is in the first column, we will either start a new table or continue the old one. 1597 if( newLine ) { 1598 if( !m_istable ) { 1599 startBlockLevel(); 1600 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 1601 m_istable = true; 1602 m_rowNum = 0; 1603 } 1604 1605 m_rowNum++; 1606 final Element tr = ( m_rowNum % 2 != 0 ) 1607 ? new Element("tr").setAttribute("class", "odd") 1608 : new Element("tr"); 1609 el = pushElement( tr ); 1610 } 1611 1612 // Check out which table cell element to start; a header element (th) or a regular element (td). 1613 final int ch = nextToken(); 1614 if( ch == '|' ) { 1615 if( !newLine ) { 1616 el = popElement("th"); 1617 if( el == null ) popElement("td"); 1618 } 1619 el = pushElement( new Element("th") ); 1620 } else { 1621 if( !newLine ) { 1622 el = popElement( "td" ); 1623 if( el == null ) popElement( "th" ); 1624 } 1625 el = pushElement( new Element("td") ); 1626 pushBack( ch ); 1627 } 1628 return el; 1629 } 1630 1631 /** 1632 * Generic escape of next character or entity. 1633 */ 1634 private Element handleTilde() throws IOException { 1635 final int ch = nextToken(); 1636 1637 if( ch == ' ' ) { 1638 if( m_wysiwygEditorMode ) { 1639 m_plainTextBuf.append( "~ " ); 1640 } 1641 return m_currentElement; 1642 } 1643 1644 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 1645 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 1646 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) { 1647 if( m_wysiwygEditorMode ) { 1648 m_plainTextBuf.append( '~' ); 1649 } 1650 m_plainTextBuf.append( ( char ) ch ); 1651 m_plainTextBuf.append( readWhile( "" + ( char ) ch ) ); 1652 return m_currentElement; 1653 } 1654 // No escape. 1655 pushBack( ch ); 1656 return null; 1657 } 1658 1659 private void fillBuffer( final Element startElement ) throws IOException { 1660 m_currentElement = startElement; 1661 m_newLine = true; 1662 boolean quitReading = false; 1663 disableOutputEscaping(); 1664 while( !quitReading ) { 1665 final int ch = nextToken(); 1666 if( ch == -1 ) { 1667 break; 1668 } 1669 1670 // Check if we're actually ending the preformatted mode. We still must do an entity transformation here. 1671 if( m_isEscaping ) { 1672 if( ch == '}' ) { 1673 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 1674 } else if( ch == -1 ) { 1675 quitReading = true; 1676 } 1677 else if( ch == '\r' ) { 1678 // DOS line feeds we ignore. 1679 } else if( ch == '<' ) { 1680 m_plainTextBuf.append( "<" ); 1681 } else if( ch == '>' ) { 1682 m_plainTextBuf.append( ">" ); 1683 } else if( ch == '&' ) { 1684 m_plainTextBuf.append( "&" ); 1685 } else if( ch == '~' ) { 1686 String braces = readWhile( "}" ); 1687 if( braces.length() >= 3 ) { 1688 m_plainTextBuf.append( "}}}" ); 1689 braces = braces.substring(3); 1690 } else { 1691 m_plainTextBuf.append( (char) ch ); 1692 } 1693 1694 for( int i = braces.length()-1; i >= 0; i-- ) { 1695 pushBack( braces.charAt( i ) ); 1696 } 1697 } else { 1698 m_plainTextBuf.append( (char) ch ); 1699 } 1700 1701 continue; 1702 } 1703 1704 // An empty line stops a list 1705 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) { 1706 m_plainTextBuf.append(unwindGeneralList()); 1707 } 1708 1709 if( m_newLine && ch != '|' && m_istable ) { 1710 popElement( "table" ); 1711 m_istable = false; 1712 } 1713 1714 int skip = IGNORE; 1715 // Do the actual parsing and catch any errors. 1716 try { 1717 skip = parseToken( ch ); 1718 } catch( final IllegalDataException e ) { 1719 LOG.info( "Page {} contains data which cannot be added to DOM tree: {}", m_context.getPage().getName(), e.getMessage() ); 1720 makeError( "Error: " + cleanupSuspectData( e.getMessage() ) ); 1721 } 1722 1723 // The idea is as follows: If the handler method returns an element (el != null), it is assumed that it 1724 // has been added in the stack. Otherwise, the character is added as is to the plaintext buffer. 1725 // 1726 // For the transition phase, if s != null, it also gets added in the plaintext buffer. 1727 switch( skip ) { 1728 case ELEMENT: 1729 m_newLine = false; 1730 break; 1731 1732 case CHARACTER: 1733 m_plainTextBuf.append( (char) ch ); 1734 m_newLine = false; 1735 break; 1736 1737 case IGNORE: 1738 default: 1739 break; 1740 } 1741 } 1742 1743 closeHeadings(); 1744 popElement( "domroot" ); 1745 } 1746 1747 private String cleanupSuspectData( final String s ) { 1748 final StringBuilder sb = new StringBuilder( s.length() ); 1749 for( int i = 0; i < s.length(); i++ ) { 1750 final char c = s.charAt(i); 1751 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 1752 else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() ); 1753 } 1754 1755 return sb.toString(); 1756 } 1757 1758 /** The token is a plain character. */ 1759 protected static final int CHARACTER = 0; 1760 1761 /** The token is a wikimarkup element. */ 1762 protected static final int ELEMENT = 1; 1763 1764 /** The token is to be ignored. */ 1765 protected static final int IGNORE = 2; 1766 1767 /** 1768 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 1769 * you think this was a wiki markup element, and IGNORE, if you think 1770 * we should ignore this altogether. 1771 * <p> 1772 * To add your own MarkupParser, you can override this method, but it 1773 * is recommended that you call super.parseToken() as well to gain advantage 1774 * of JSPWiki's own markup. You can call it at the start of your own 1775 * parseToken() or end - it does not matter. 1776 * 1777 * @param ch The character under investigation 1778 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 1779 * @throws IOException If parsing fails. 1780 */ 1781 protected int parseToken( final int ch ) throws IOException { 1782 Element el = null; 1783 // Now, check the incoming token. 1784 switch( ch ) { 1785 case '\r': 1786 // DOS linefeeds we forget 1787 return IGNORE; 1788 1789 case '\n': 1790 // Close things like headings, etc. 1791 // FIXME: This is not really very fast 1792 closeHeadings(); 1793 1794 popElement( "dl" ); // Close definition lists. 1795 if( m_istable ) { 1796 popElement("tr"); 1797 } 1798 m_isdefinition = false; 1799 if( m_newLine ) { 1800 // Paragraph change. 1801 startBlockLevel(); 1802 // Figure out which elements cannot be enclosed inside a <p></p> pair according to XHTML rules. 1803 final String nextLine = peekAheadLine(); 1804 if( nextLine.isEmpty() || 1805 ( !nextLine.isEmpty() && 1806 !nextLine.startsWith( "{{{" ) && 1807 !nextLine.startsWith( "----" ) && 1808 !nextLine.startsWith( "%%" ) && 1809 "*#!;".indexOf( nextLine.charAt( 0 ) ) == -1 ) ) { 1810 pushElement( new Element( "p" ) ); 1811 m_isOpenParagraph = true; 1812 1813 if( m_restartitalic ) { 1814 pushElement( new Element( "i" ) ); 1815 m_isitalic = true; 1816 m_restartitalic = false; 1817 } 1818 if( m_restartbold ) { 1819 pushElement( new Element( "b" ) ); 1820 m_isbold = true; 1821 m_restartbold = false; 1822 } 1823 } 1824 } else { 1825 m_plainTextBuf.append("\n"); 1826 m_newLine = true; 1827 } 1828 return IGNORE; 1829 1830 case '\\': 1831 el = handleBackslash(); 1832 break; 1833 1834 case '_': 1835 el = handleUnderscore(); 1836 break; 1837 1838 case '\'': 1839 el = handleApostrophe(); 1840 break; 1841 1842 case '{': 1843 el = handleOpenbrace( m_newLine ); 1844 break; 1845 1846 case '}': 1847 el = handleClosebrace(); 1848 break; 1849 1850 case '-': 1851 if( m_newLine ) { 1852 el = handleDash(); 1853 } 1854 break; 1855 1856 case '!': 1857 if( m_newLine ) { 1858 el = handleHeading(); 1859 } 1860 break; 1861 1862 case ';': 1863 if( m_newLine ) { 1864 el = handleDefinitionList(); 1865 } 1866 break; 1867 1868 case ':': 1869 if( m_isdefinition ) { 1870 popElement( "dt" ); 1871 el = pushElement( new Element( "dd" ) ); 1872 m_isdefinition = false; 1873 } 1874 break; 1875 1876 case '[': 1877 el = handleOpenbracket(); 1878 break; 1879 1880 case '*': 1881 if( m_newLine ) { 1882 pushBack( '*' ); 1883 el = handleGeneralList(); 1884 } 1885 break; 1886 1887 case '#': 1888 if( m_newLine ) { 1889 pushBack( '#' ); 1890 el = handleGeneralList(); 1891 } 1892 break; 1893 1894 case '|': 1895 el = handleBar( m_newLine ); 1896 break; 1897 1898 case '~': 1899 el = handleTilde(); 1900 break; 1901 1902 case '%': 1903 el = handleDiv( m_newLine ); 1904 break; 1905 1906 case '/': 1907 el = handleSlash( m_newLine ); 1908 break; 1909 1910 default: 1911 break; 1912 } 1913 1914 return el != null ? ELEMENT : CHARACTER; 1915 } 1916 1917 private void closeHeadings() { 1918 if( m_lastHeading != null && !m_wysiwygEditorMode ) { 1919 // Add the hash anchor element at the end of the heading 1920 addElement( new Element("a").setAttribute( "class",HASHLINK ) 1921 .setAttribute( "href","#" + m_lastHeading.m_titleAnchor ) 1922 .setText( "#" ) ); 1923 m_lastHeading = null; 1924 } 1925 popElement( "h2" ); 1926 popElement( "h3" ); 1927 popElement( "h4" ); 1928 } 1929 1930 /** 1931 * Parses the entire document from the Reader given in the constructor or set by {@link #setInputReader(Reader)}. 1932 * 1933 * @return A WikiDocument, ready to be passed to the renderer. 1934 * @throws IOException If parsing cannot be accomplished. 1935 */ 1936 @Override 1937 public WikiDocument parse() throws IOException { 1938 final WikiDocument d = new WikiDocument( m_context.getPage() ); 1939 d.setContext( m_context ); 1940 final Element rootElement = new Element( "domroot" ); 1941 d.setRootElement( rootElement ); 1942 fillBuffer( rootElement ); 1943 paragraphify( rootElement ); 1944 1945 return d; 1946 } 1947 1948 /** 1949 * Checks out that the first paragraph is correctly installed. 1950 * 1951 * @param rootElement element to be checked. 1952 */ 1953 private void paragraphify( final Element rootElement) { 1954 // Add the paragraph tag to the first paragraph 1955 final List< Content > kids = rootElement.getContent(); 1956 if( rootElement.getChild( "p" ) != null ) { 1957 final ArrayList<Content> ls = new ArrayList<>(); 1958 int idxOfFirstContent = 0; 1959 int count = 0; 1960 1961 for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) { 1962 final Content c = i.next(); 1963 if( c instanceof Element ) { 1964 final String name = ( ( Element )c ).getName(); 1965 if( isBlockLevel( name ) ) { 1966 break; 1967 } 1968 } 1969 1970 if( !( c instanceof ProcessingInstruction ) ) { 1971 ls.add( c ); 1972 if( idxOfFirstContent == 0 ) { 1973 idxOfFirstContent = count; 1974 } 1975 } 1976 } 1977 1978 // If there were any elements, then add a new <p> (unless it would be an empty one) 1979 if( ls.size() > 0 ) { 1980 final Element newel = new Element("p"); 1981 for( final Content c : ls ) { 1982 c.detach(); 1983 newel.addContent( c ); 1984 } 1985 1986 // Make sure there are no empty <p/> tags added. 1987 if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() ) { 1988 rootElement.addContent( idxOfFirstContent, newel ); 1989 } 1990 } 1991 } 1992 } 1993 1994}