001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.commons.text.StringEscapeUtils; 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.apache.oro.text.regex.MalformedPatternException; 026import org.apache.oro.text.regex.MatchResult; 027import org.apache.oro.text.regex.Pattern; 028import org.apache.oro.text.regex.PatternCompiler; 029import org.apache.oro.text.regex.PatternMatcher; 030import org.apache.oro.text.regex.Perl5Compiler; 031import org.apache.oro.text.regex.Perl5Matcher; 032import org.apache.wiki.InternalWikiException; 033import org.apache.wiki.StringTransmutator; 034import org.apache.wiki.api.core.Acl; 035import org.apache.wiki.api.core.Context; 036import org.apache.wiki.api.core.ContextEnum; 037import org.apache.wiki.api.core.Page; 038import org.apache.wiki.api.exceptions.PluginException; 039import org.apache.wiki.api.plugin.Plugin; 040import org.apache.wiki.api.spi.Wiki; 041import org.apache.wiki.attachment.AttachmentManager; 042import org.apache.wiki.auth.AuthorizationManager; 043import org.apache.wiki.auth.UserManager; 044import org.apache.wiki.auth.WikiSecurityException; 045import org.apache.wiki.auth.acl.AclManager; 046import org.apache.wiki.i18n.InternationalizationManager; 047import org.apache.wiki.preferences.Preferences; 048import org.apache.wiki.util.TextUtil; 049import org.apache.wiki.util.XmlUtil; 050import org.apache.wiki.variables.VariableManager; 051import org.jdom2.Attribute; 052import org.jdom2.Content; 053import org.jdom2.Element; 054import org.jdom2.IllegalDataException; 055import org.jdom2.ProcessingInstruction; 056import org.jdom2.Verifier; 057 058import javax.xml.transform.Result; 059import java.io.IOException; 060import java.io.Reader; 061import java.io.StringReader; 062import java.text.MessageFormat; 063import java.util.ArrayList; 064import java.util.Arrays; 065import java.util.Collection; 066import java.util.EmptyStackException; 067import java.util.HashMap; 068import java.util.Iterator; 069import java.util.List; 070import java.util.Map; 071import java.util.Properties; 072import java.util.ResourceBundle; 073import java.util.Stack; 074 075/** 076 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the heart and soul of JSPWiki : make 077 * sure you test properly anything that is added, or else it breaks down horribly. 078 * 079 * @since 2.4 080 */ 081public class JSPWikiMarkupParser extends MarkupParser { 082 083 protected static final int READ = 0; 084 protected static final int EDIT = 1; 085 protected static final int EMPTY = 2; // Empty message 086 protected static final int LOCAL = 3; 087 protected static final int LOCALREF = 4; 088 protected static final int IMAGE = 5; 089 protected static final int EXTERNAL = 6; 090 protected static final int INTERWIKI = 7; 091 protected static final int IMAGELINK = 8; 092 protected static final int IMAGEWIKILINK = 9; 093 protected static final int ATTACHMENT = 10; 094 095 private static final Logger LOG = LogManager.getLogger( JSPWikiMarkupParser.class ); 096 097 private boolean m_isbold; 098 private boolean m_isitalic; 099 private boolean m_istable; 100 private boolean m_isPre; 101 private boolean m_isEscaping; 102 private boolean m_isdefinition; 103 private boolean m_isPreBlock; 104 105 /** Contains style information, in multiple forms. */ 106 private final Stack< Boolean > m_styleStack = new Stack<>(); 107 108 // general list handling 109 private int m_genlistlevel; 110 private final StringBuilder m_genlistBulletBuffer = new StringBuilder( 10 ); // stores the # and * pattern 111 private final boolean m_allowPHPWikiStyleLists = true; 112 113 private boolean m_isOpenParagraph; 114 115 /** Parser for extended link functionality. */ 116 private final LinkParser m_linkParser = new LinkParser(); 117 118 /** Keeps track of any plain text that gets put in the Text nodes */ 119 private StringBuilder m_plainTextBuf = new StringBuilder( 20 ); 120 121 private Element m_currentElement; 122 123 /** Keep track of duplicate header names. */ 124 private final Map< String, Integer > m_titleSectionCounter = new HashMap<>(); 125 126 /** If true, then considers CamelCase links as well. */ 127 private boolean m_camelCaseLinks; 128 129 /** If true, then generate special output for wysiwyg editing in certain cases */ 130 private boolean m_wysiwygEditorMode; 131 132 /** If true, consider URIs that have no brackets as well. */ 133 // FIXME: Currently reserved, but not used. 134 private boolean m_plainUris; 135 136 /** If true, all outward links use a small link image. */ 137 private boolean m_useOutlinkImage = true; 138 139 private boolean m_useAttachmentImage = true; 140 141 /** If true, allows raw HTML. */ 142 private boolean m_allowHTML; 143 144 private boolean m_useRelNofollow; 145 146 private final PatternCompiler m_compiler = new Perl5Compiler(); 147 148 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 149 150 private final PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 151 private Pattern m_camelCasePattern; 152 153 private int m_rowNum = 1; 154 155 private Heading m_lastHeading; 156 157 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 158 159 /** 160 * Creates a markup parser. 161 * 162 * @param context The WikiContext which controls the parsing 163 * @param in Where the data is read from. 164 */ 165 public JSPWikiMarkupParser( final Context context, final Reader in ) { 166 super( context, in ); 167 initialize(); 168 } 169 170 // FIXME: parsers should be pooled for better performance. 171 private void initialize() { 172 initInlineImagePatterns(); 173 174 m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN ); 175 if( m_camelCasePattern == null ) { 176 try { 177 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 178 } catch( final MalformedPatternException e ) { 179 LOG.fatal("Internal error: Someone put in a faulty pattern.",e); 180 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 181 } 182 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 183 } 184 185 // Set the properties. 186 final Properties props = m_engine.getWikiProperties(); 187 final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 188 189 if( cclinks != null ) { 190 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 191 } else { 192 m_camelCaseLinks = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks ); 193 } 194 195 final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE ); 196 if( wysiwygVariable != null ) { 197 m_wysiwygEditorMode = wysiwygVariable; 198 } 199 200 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 201 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 202 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 203 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 204 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 205 206 if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) { 207 disableAccessRules(); 208 } 209 210 m_context.getPage().setHasMetadata(); 211 } 212 213 /** 214 * Calls a transmutator chain. 215 * 216 * @param list Chain to call 217 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 218 * @return The result of the mutation. 219 */ 220 protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) { 221 if( list == null || list.size() == 0 ) { 222 return text; 223 } 224 225 for( final StringTransmutator m : list ) { 226 text = m.mutate( m_context, text ); 227 } 228 229 return text; 230 } 231 232 /** 233 * Calls the heading listeners. 234 * 235 * @param param A Heading object. 236 */ 237 private void callHeadingListenerChain( final Heading param ) { 238 for( final HeadingListener h : m_headingListenerChain ) { 239 h.headingAdded( m_context, param ); 240 } 241 } 242 243 /** 244 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 245 * if you really know what you are doing. 246 * 247 * @param type One of the types above 248 * @param link URL to which to link to 249 * @param text Link text 250 * @param section If a particular section identifier is required. 251 * @return An A element. 252 * @since 2.4.78 253 */ 254 private Element createAnchor( final int type, final String link, String text, String section ) { 255 text = escapeHTMLEntities( text ); 256 section = escapeHTMLEntities( section ); 257 final Element el = new Element( "a" ); 258 el.setAttribute( "class", CLASS_TYPES[ type ] ); 259 el.setAttribute( "href", link + section ); 260 el.addContent( text ); 261 return el; 262 } 263 264 private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) { 265 Element el = null; 266 if( text == null ) { 267 text = link; 268 } 269 text = callMutatorChain( m_linkMutators, text ); 270 section = (section != null) ? ("#"+section) : ""; 271 272 // Make sure we make a link name that can be accepted as a valid URL. 273 if( link.isEmpty() ) { 274 type = EMPTY; 275 } 276 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 277 278 switch( type ) { 279 case READ: 280 el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section ); 281 break; 282 283 case EDIT: 284 el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" ); 285 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 286 break; 287 288 case EMPTY: 289 el = new Element("u").addContent(text); 290 break; 291 292 // These two are for local references - footnotes and references to footnotes. 293 // We embed the page name (or whatever WikiContext gives us) to make sure the links are unique across Wiki. 294 case LOCALREF: 295 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 296 break; 297 298 case LOCAL: 299 el = new Element( "a" ).setAttribute( "class", CLASS_FOOTNOTE ); 300 el.setAttribute( "name", "ref-" + m_context.getName() + "-" + link.substring( 1 ) ); 301 el.addContent( "[" + text + "]" ); 302 break; 303 304 // With the image, external and interwiki types we need to make sure nobody can put in Javascript or 305 // something else annoying into the links themselves. We do this by preventing a haxor from stopping 306 // the link name short with quotes in fillBuffer(). 307 case IMAGE: 308 el = new Element( "img" ).setAttribute( "class", "inline" ); 309 el.setAttribute( "src", link ); 310 el.setAttribute( "alt", text ); 311 break; 312 313 case IMAGELINK: 314 el = new Element( "img" ).setAttribute( "class", "inline" ); 315 el.setAttribute( "src", link ); 316 el.setAttribute( "alt", text ); 317 el = createAnchor( IMAGELINK, text, "", "" ).addContent( el ); 318 break; 319 320 case IMAGEWIKILINK: 321 final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text ); 322 el = new Element( "img" ).setAttribute( "class", "inline" ); 323 el.setAttribute( "src", link ); 324 el.setAttribute( "alt", text ); 325 el = createAnchor( IMAGEWIKILINK, pagelink, "", "" ).addContent( el ); 326 break; 327 328 case EXTERNAL: 329 el = createAnchor( EXTERNAL, link, text, section ); 330 if( m_useRelNofollow ) { 331 el.setAttribute( "rel", "nofollow" ); 332 } 333 break; 334 335 case INTERWIKI: 336 el = createAnchor( INTERWIKI, link, text, section ); 337 break; 338 339 case ATTACHMENT: 340 final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link ); 341 final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link ); 342 final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" ); 343 el = createAnchor( ATTACHMENT, attlink, text, "" ); 344 if( m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) { 345 el.setAttribute("download", ""); 346 } 347 348 pushElement( el ); 349 popElement( el.getName() ); 350 351 if( m_useAttachmentImage ) { 352 el = new Element( "img" ).setAttribute( "src", imglink ); 353 el.setAttribute( "border", "0" ); 354 el.setAttribute( "alt", "(info)" ); 355 356 el = new Element( "a" ).setAttribute( "href", infolink ).addContent( el ); 357 el.setAttribute( "class", "infolink" ); 358 } else { 359 el = null; 360 } 361 break; 362 363 default: 364 break; 365 } 366 367 if( el != null && attributes != null ) { 368 while( attributes.hasNext() ) { 369 final Attribute attr = attributes.next(); 370 if( attr != null ) { 371 el.setAttribute( attr ); 372 } 373 } 374 } 375 376 if( el != null ) { 377 flushPlainText(); 378 m_currentElement.addContent( el ); 379 } 380 return el; 381 } 382 383 /** 384 * These are all the HTML 4.01 block-level elements. 385 */ 386 private static final String[] BLOCK_ELEMENTS = { 387 "address", "blockquote", "div", "dl", "fieldset", "form", 388 "h1", "h2", "h3", "h4", "h5", "h6", 389 "hr", "noscript", "ol", "p", "pre", "table", "ul" 390 }; 391 392 private static boolean isBlockLevel( final String name ) { 393 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 394 } 395 396 /** 397 * This method peeks ahead in the stream until EOL and returns the result. It will keep the buffers untouched. 398 * 399 * @return The string from the current position to the end of line. 400 */ 401 // FIXME: Always returns an empty line, even if the stream is full. 402 private String peekAheadLine() throws IOException { 403 final String s = readUntilEOL().toString(); 404 if( s.length() > PUSHBACK_BUFFER_SIZE ) { 405 LOG.warn( "Line is longer than maximum allowed size (" + PUSHBACK_BUFFER_SIZE + " characters. Attempting to recover..." ); 406 pushBack( s.substring( 0, PUSHBACK_BUFFER_SIZE - 1 ) ); 407 } else { 408 try { 409 pushBack( s ); 410 } catch( final IOException e ) { 411 LOG.warn( "Pushback failed: the line is probably too long. Attempting to recover." ); 412 } 413 } 414 return s; 415 } 416 417 private int flushPlainText() { 418 final int numChars = m_plainTextBuf.length(); 419 if( numChars > 0 ) { 420 String buf; 421 422 if( !m_allowHTML ) { 423 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 424 } else { 425 buf = m_plainTextBuf.toString(); 426 } 427 // We must first empty the buffer because the side effect of calling makeCamelCaseLink() is to call this routine. 428 m_plainTextBuf = new StringBuilder(20); 429 try { 430 // This is the heaviest part of parsing, and therefore we can do some optimization here. 431 // 1) Only when the length of the buffer is big enough, we try to do the match 432 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) { 433 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) { 434 final MatchResult result = m_camelCaseMatcher.getMatch(); 435 final String firstPart = buf.substring( 0, result.beginOffset( 0 ) ); 436 String prefix = result.group( 1 ); 437 if( prefix == null ) { 438 prefix = ""; 439 } 440 441 final String camelCase = result.group(2); 442 final String protocol = result.group(3); 443 String uri = protocol+result.group(4); 444 buf = buf.substring(result.endOffset(0)); 445 446 m_currentElement.addContent( firstPart ); 447 // Check if the user does not wish to do URL or WikiWord expansion 448 if( prefix.endsWith( "~" ) || prefix.indexOf( '[' ) != -1 ) { 449 if( prefix.endsWith( "~" ) ) { 450 if( m_wysiwygEditorMode ) { 451 m_currentElement.addContent( "~" ); 452 } 453 prefix = prefix.substring( 0, prefix.length() - 1 ); 454 } 455 if( camelCase != null ) { 456 m_currentElement.addContent( prefix + camelCase ); 457 } else if( protocol != null ) { 458 m_currentElement.addContent( prefix + uri ); 459 } 460 continue; 461 } 462 463 // Fine, then let's check what kind of link this was and emit the proper elements 464 if( protocol != null ) { 465 final char c = uri.charAt( uri.length() - 1 ); 466 if( c == '.' || c == ',' ) { 467 uri = uri.substring( 0, uri.length() - 1 ); 468 buf = c + buf; 469 } 470 // System.out.println("URI match "+uri); 471 m_currentElement.addContent( prefix ); 472 makeDirectURILink( uri ); 473 } else { 474 // System.out.println("Matched: '"+camelCase+"'"); 475 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 476 // System.out.println("prefix="+prefix); 477 m_currentElement.addContent( prefix ); 478 makeCamelCaseLink( camelCase ); 479 } 480 } 481 m_currentElement.addContent( buf ); 482 } else { 483 // No camelcase asked for, just add the elements 484 m_currentElement.addContent( buf ); 485 } 486 } catch( final IllegalDataException e ) { 487 // Sometimes it's possible that illegal XML chars is added to the data. Here we make sure it does not stop parsing. 488 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 489 } 490 } 491 492 return numChars; 493 } 494 495 /** 496 * Escapes XML entities in a HTML-compatible way (i.e. does not escape entities that are already escaped). 497 * 498 * @param buf 499 * @return An escaped string. 500 */ 501 private String escapeHTMLEntities( final String buf ) { 502 final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 503 for( int i = 0; i < buf.length(); i++ ) { 504 final char ch = buf.charAt(i); 505 if( ch == '<' ) { 506 tmpBuf.append("<"); 507 } else if( ch == '>' ) { 508 tmpBuf.append(">"); 509 } else if( ch == '\"' ) { 510 tmpBuf.append("""); 511 } else if( ch == '&' ) { 512 // If the following is an XML entity reference (&#.*;) we'll leave it as it is; otherwise we'll replace it with an & 513 boolean isEntity = false; 514 final StringBuilder entityBuf = new StringBuilder(); 515 if( i < buf.length() -1 ) { 516 for( int j = i; j < buf.length(); j++ ) { 517 final char ch2 = buf.charAt( j ); 518 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) { 519 entityBuf.append(ch2); 520 if( ch2 == ';' ) { 521 isEntity = true; 522 break; 523 } 524 } else { 525 break; 526 } 527 } 528 } 529 530 if( isEntity ) { 531 tmpBuf.append( entityBuf ); 532 i = i + entityBuf.length() - 1; 533 } else { 534 tmpBuf.append( "&" ); 535 } 536 537 } else { 538 tmpBuf.append( ch ); 539 } 540 } 541 542 return tmpBuf.toString(); 543 } 544 545 private Element pushElement( final Element e ) { 546 flushPlainText(); 547 m_currentElement.addContent( e ); 548 m_currentElement = e; 549 550 return e; 551 } 552 553 private Element addElement( final Content e ) { 554 if( e != null ) { 555 flushPlainText(); 556 m_currentElement.addContent( e ); 557 } 558 return m_currentElement; 559 } 560 561 /** 562 * All elements that can be empty by the HTML DTD. 563 */ 564 // Keep sorted. 565 private static final String[] EMPTY_ELEMENTS = { 566 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 567 }; 568 569 /** 570 * Goes through the current element stack and pops all elements until this 571 * element is found - this essentially "closes" and element. 572 * 573 * @param s element to be found. 574 * @return The new current element, or null, if there was no such element in the entire stack. 575 */ 576 private Element popElement( final String s ) { 577 final int flushedBytes = flushPlainText(); 578 Element currEl = m_currentElement; 579 while( currEl.getParentElement() != null ) { 580 if( currEl.getName().equals( s ) && !currEl.isRootElement() ) { 581 m_currentElement = currEl.getParentElement(); 582 583 // Check if it's okay for this element to be empty. Then we will 584 // trick the JDOM generator into not generating an empty element, 585 // by putting an empty string between the tags. Yes, it's a kludge 586 // but what'cha gonna do about it. :-) 587 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) { 588 currEl.addContent( "" ); 589 } 590 return m_currentElement; 591 } 592 currEl = currEl.getParentElement(); 593 } 594 return null; 595 } 596 597 598 /** 599 * Reads the stream until it meets one of the specified ending characters, or stream end. The ending 600 * character will be left in the stream. 601 */ 602 private String readUntil( final String endChars ) throws IOException { 603 final StringBuilder sb = new StringBuilder( 80 ); 604 int ch = nextToken(); 605 while( ch != -1 ) { 606 if( ch == '\\' ) { 607 ch = nextToken(); 608 if( ch == -1 ) { 609 break; 610 } 611 } else { 612 if( endChars.indexOf( ( char )ch ) != -1 ) { 613 pushBack( ch ); 614 break; 615 } 616 } 617 sb.append( ( char )ch ); 618 ch = nextToken(); 619 } 620 621 return sb.toString(); 622 } 623 624 /** 625 * Reads the stream while the characters that have been specified are 626 * in the stream, returning then the result as a String. 627 */ 628 private String readWhile( final String endChars ) throws IOException { 629 final StringBuilder sb = new StringBuilder( 80 ); 630 int ch = nextToken(); 631 while( ch != -1 ) { 632 if( endChars.indexOf( ( char ) ch ) == -1 ) { 633 pushBack( ch ); 634 break; 635 } 636 sb.append( ( char ) ch ); 637 ch = nextToken(); 638 } 639 640 return sb.toString(); 641 } 642 643 private JSPWikiMarkupParser m_cleanTranslator; 644 645 /** 646 * Does a lazy init. Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before 647 * the TranslatorReader it is contained by is up. 648 */ 649 private JSPWikiMarkupParser getCleanTranslator() { 650 if( m_cleanTranslator == null ) { 651 final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() ); 652 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 653 m_cleanTranslator.m_allowHTML = true; 654 } 655 656 return m_cleanTranslator; 657 } 658 659 /** 660 * Modifies the "hd" parameter to contain proper values. Because 661 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 662 * % after url encoding with '_'. 663 * <p> 664 * Counts also duplicate headings (= headings with similar name), and 665 * attaches a counter. 666 */ 667 protected String makeHeadingAnchor( final String baseName, String title, final Heading hd ) { 668 hd.m_titleText = title; 669 title = MarkupParser.wikifyLink( title ); 670 hd.m_titleSection = m_engine.encodeName(title); 671 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) { 672 final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1; 673 m_titleSectionCounter.put( hd.m_titleSection, count ); 674 hd.m_titleSection += "-" + count; 675 } else { 676 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 677 } 678 679 hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection; 680 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 681 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 682 683 return hd.m_titleAnchor; 684 } 685 686 private String makeSectionTitle( String title ) { 687 title = title.trim(); 688 try { 689 final JSPWikiMarkupParser dtr = getCleanTranslator(); 690 dtr.setInputReader( new StringReader( title ) ); 691 final WikiDocument doc = dtr.parse(); 692 doc.setContext( m_context ); 693 694 return XmlUtil.extractTextFromDocument( doc ); 695 } catch( final IOException e ) { 696 LOG.fatal("Title parsing not working", e ); 697 throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e ); 698 } 699 } 700 701 /** 702 * Returns XHTML for the heading. 703 * 704 * @param level The level of the heading. @see Heading 705 * @param title the title for the heading 706 * @param hd a List to which heading should be added 707 * @return An Element containing the heading 708 */ 709 public Element makeHeading( final int level, final String title, final Heading hd ) { 710 final Element el; 711 final String pageName = m_context.getPage().getName(); 712 final String outTitle = makeSectionTitle( title ); 713 hd.m_level = level; 714 715 switch( level ) { 716 case Heading.HEADING_SMALL: 717 el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 718 break; 719 720 case Heading.HEADING_MEDIUM: 721 el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 722 break; 723 724 case Heading.HEADING_LARGE: 725 el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 726 break; 727 728 default: 729 throw new InternalWikiException( "Illegal heading type " + level ); 730 } 731 732 return el; 733 } 734 735 /** 736 * When given a link to a WikiName, we just return a proper HTML link for it. The local link mutator 737 * chain is also called. 738 */ 739 private Element makeCamelCaseLink( final String wikiname ) { 740 final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 741 callMutatorChain( m_localLinkMutatorChain, wikiname ); 742 if( matchedLink != null ) { 743 makeLink( READ, matchedLink, wikiname, null, null ); 744 } else { 745 makeLink( EDIT, wikiname, wikiname, null, null ); 746 } 747 748 return m_currentElement; 749 } 750 751 /** Holds the image URL for the duration of this parser */ 752 private String m_outlinkImageURL; 753 754 /** 755 * Returns an element for the external link image (out.png). However, this method caches the URL for the lifetime 756 * of this MarkupParser, because it's commonly used, and we'll end up with possibly hundreds our thousands of 757 * references to it... It's a lot faster, too. 758 * 759 * @return An element containing the HTML for the outlink image. 760 */ 761 private Element outlinkImage() { 762 Element el = null; 763 if( m_useOutlinkImage ) { 764 if( m_outlinkImageURL == null ) { 765 m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE ); 766 } 767 768 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 769 el.setAttribute( "src", m_outlinkImageURL ); 770 el.setAttribute( "alt","" ); 771 } 772 773 return el; 774 } 775 776 /** 777 * Takes a URL and turns it into a regular wiki link. Unfortunately, because of the way that flushPlainText() 778 * works, it already encodes all the XML entities. But so does WikiContext.getURL(), so we 779 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 780 * <p> 781 * What a crappy problem. 782 * 783 * @param url provided url. 784 * @return An anchor Element containing the link. 785 */ 786 private Element makeDirectURILink( String url ) { 787 final Element result; 788 String last = null; 789 790 if( url.endsWith( "," ) || url.endsWith( "." ) ) { 791 last = url.substring( url.length() - 1 ); 792 url = url.substring( 0, url.length() - 1 ); 793 } 794 795 callMutatorChain( m_externalLinkMutatorChain, url ); 796 797 if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) { 798 result = handleImageLink( StringUtils.replace( url, "&", "&" ), url, false ); 799 } else { 800 result = makeLink( EXTERNAL, StringUtils.replace( url, "&", "&" ), url, null, null ); 801 addElement( outlinkImage() ); 802 } 803 804 if( last != null ) { 805 m_plainTextBuf.append( last ); 806 } 807 808 return result; 809 } 810 811 /** 812 * Image links are handled differently: 813 * 1. If the text is a WikiName of an existing page, it gets linked. 814 * 2. If the text is an external link, then it is inlined. 815 * 3. Otherwise, it becomes an ALT text. 816 * 817 * @param reallink The link to the image. 818 * @param link Link text portion, may be a link to somewhere else. 819 * @param hasLinkText If true, then the defined link had a link text available. 820 * This means that the link text may be a link to a wiki page, 821 * or an external resource. 822 */ 823 private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) { 824 final String possiblePage = MarkupParser.cleanLink( link ); 825 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) { 826 return makeLink( IMAGELINK, reallink, link, null, null ); 827 } else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) { 828 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 829 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 830 } else { 831 return makeLink( IMAGE, reallink, link, null, null ); 832 } 833 } 834 835 private Element handleAccessRule( String ruleLine ) { 836 if( m_wysiwygEditorMode ) { 837 m_currentElement.addContent( "[" + ruleLine + "]" ); 838 } 839 if( !m_parseAccessRules ) { 840 return m_currentElement; 841 } 842 final Page page = m_context.getRealPage(); 843 // UserDatabase db = m_context.getEngine().getUserDatabase(); 844 845 if( ruleLine.startsWith( "{" ) ) { 846 ruleLine = ruleLine.substring( 1 ); 847 } 848 849 if( ruleLine.endsWith( "}" ) ) { 850 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 851 } 852 853 LOG.debug("page={}, ACL = {}", page.getName(), ruleLine); 854 855 try { 856 final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine ); 857 page.setAcl( acl ); 858 LOG.debug( acl.toString() ); 859 } catch( final WikiSecurityException wse ) { 860 return makeError( wse.getMessage() ); 861 } 862 863 return m_currentElement; 864 } 865 866 /** 867 * Handles metadata setting [{SET foo=bar}] 868 */ 869 private Element handleMetadata( final String link ) { 870 if( m_wysiwygEditorMode ) { 871 m_currentElement.addContent( "[" + link + "]" ); 872 } 873 874 try { 875 final String args = link.substring( link.indexOf(' '), link.length()-1 ); 876 final String name = args.substring( 0, args.indexOf('=') ).trim(); 877 String val = args.substring( args.indexOf('=')+1 ).trim(); 878 879 if( val.startsWith("'") ) { 880 val = val.substring( 1 ); 881 } 882 if( val.endsWith("'") ) { 883 val = val.substring( 0, val.length()-1 ); 884 } 885 886 // LOG.debug("SET name='"+name+"', value='"+val+"'."); 887 888 if( !name.isEmpty() && !val.isEmpty() ) { 889 val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val ); 890 m_context.getPage().setAttribute( name, val ); 891 } 892 } catch( final Exception e ) { 893 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 894 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 895 } 896 897 return m_currentElement; 898 } 899 900 /** 901 * Emits a processing instruction that will disable markup escaping. This is 902 * very useful if you want to emit HTML directly into the stream. 903 */ 904 private void disableOutputEscaping() { 905 addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) ); 906 } 907 908 /** 909 * Gobbles up all hyperlinks that are encased in square brackets. 910 */ 911 private Element handleHyperlinks( String linktext, final int pos ) { 912 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 913 final StringBuilder sb = new StringBuilder( linktext.length() + 80 ); 914 915 if( m_linkParsingOperations.isAccessRule( linktext ) ) { 916 return handleAccessRule( linktext ); 917 } 918 919 if( m_linkParsingOperations.isMetadata( linktext ) ) { 920 return handleMetadata( linktext ); 921 } 922 923 if( m_linkParsingOperations.isPluginLink( linktext ) ) { 924 try { 925 final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 926 927 // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not. 928 if( pluginContent != null ) { 929 addElement( pluginContent ); 930 pluginContent.executeParse( m_context ); 931 } 932 } catch( final PluginException e ) { 933 LOG.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 934 //LOG.info( "Root cause:",e.getRootThrowable() ); 935 if( !m_wysiwygEditorMode ) { 936 final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE ); 937 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 938 m_context.getRealPage().getWiki(), 939 m_context.getRealPage().getName(), 940 e.getMessage() ) ) ); 941 } 942 } 943 return m_currentElement; 944 } 945 946 try { 947 final LinkParser.Link link = m_linkParser.parse( linktext ); 948 linktext = link.getText(); 949 String linkref = link.getReference(); 950 // Yes, we now have the components separated. 951 // linktext = the text the link should have 952 // linkref = the url or page name. 953 // In many cases these are the same. [linktext|linkref]. 954 if( m_linkParsingOperations.isVariableLink( linktext ) ) { 955 final Content el = new VariableContent( linktext ); 956 addElement( el ); 957 } else if( m_linkParsingOperations.isExternalLink( linkref ) ) { 958 // It's an external link, out of this Wiki 959 callMutatorChain( m_externalLinkMutatorChain, linkref ); 960 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 961 handleImageLink( linkref, linktext, link.hasReference() ); 962 } else { 963 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 964 addElement( outlinkImage() ); 965 } 966 } else if( link.isInterwikiLink() ) { 967 // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved. 968 969 // FIXME: There is an interesting issue here: We probably should 970 // URLEncode the wikiPage, but we can't since some of the 971 // Wikis use slashes (/), which won't survive URLEncoding. 972 // Besides, we don't know which character set the other Wiki 973 // is using, so you'll have to write the entire name as it appears 974 // in the URL. Bugger. 975 976 final String extWiki = link.getExternalWiki(); 977 final String wikiPage = link.getExternalWikiPage(); 978 if( m_wysiwygEditorMode ) { 979 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 980 } else { 981 String urlReference = m_engine.getInterWikiURL( extWiki ); 982 if( urlReference != null ) { 983 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 984 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 985 986 if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) { 987 handleImageLink( urlReference, linktext, link.hasReference() ); 988 } else { 989 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 990 } 991 if( m_linkParsingOperations.isExternalLink( urlReference ) ) { 992 addElement( outlinkImage() ); 993 } 994 } else { 995 final Object[] args = { escapeHTMLEntities( extWiki ) }; 996 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 997 } 998 } 999 } else if( linkref.startsWith( "#" ) ) { 1000 // It defines a local footnote 1001 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1002 } else if( TextUtil.isNumber( linkref ) ) { 1003 // It defines a reference to a local footnote 1004 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1005 } else { 1006 final int hashMark; 1007 1008 // Internal wiki link, but is it an attachment link? 1009 String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref ); 1010 if( attachment != null ) { 1011 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1012 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 1013 attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment ); 1014 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1015 } else { 1016 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1017 } 1018 } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) { 1019 // It's an internal Wiki link, but to a named section 1020 final String namedSection = linkref.substring( hashMark + 1 ); 1021 linkref = linkref.substring( 0, hashMark ); 1022 linkref = MarkupParser.cleanLink( linkref ); 1023 callMutatorChain( m_localLinkMutatorChain, linkref ); 1024 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1025 if( matchedLink != null ) { 1026 String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) ); 1027 sectref = sectref.replace( '%', '_' ); 1028 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1029 } else { 1030 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1031 } 1032 } else { 1033 // It's an internal Wiki link 1034 linkref = MarkupParser.cleanLink( linkref ); 1035 callMutatorChain( m_localLinkMutatorChain, linkref ); 1036 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1037 if( matchedLink != null ) { 1038 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1039 } else { 1040 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1041 } 1042 } 1043 } 1044 1045 } catch( final ParseException e ) { 1046 LOG.info( "Parser failure: ", e ); 1047 final Object[] args = { e.getMessage() }; 1048 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1049 } 1050 return m_currentElement; 1051 } 1052 1053 /** 1054 * Pushes back any string that has been read. It will obviously be pushed back in a reverse order. 1055 * 1056 * @since 2.1.77 1057 */ 1058 private void pushBack( final String s ) throws IOException { 1059 for( int i = s.length()-1; i >= 0; i-- ) { 1060 pushBack( s.charAt(i) ); 1061 } 1062 } 1063 1064 private Element handleBackslash() throws IOException { 1065 final int ch = nextToken(); 1066 if( ch == '\\' ) { 1067 final int ch2 = nextToken(); 1068 if( ch2 == '\\' ) { 1069 pushElement( new Element( "br" ).setAttribute( "clear", "all" ) ); 1070 return popElement( "br" ); 1071 } 1072 pushBack( ch2 ); 1073 pushElement( new Element( "br" ) ); 1074 return popElement( "br" ); 1075 } 1076 pushBack( ch ); 1077 return null; 1078 } 1079 1080 private Element handleUnderscore() throws IOException { 1081 final int ch = nextToken(); 1082 Element el = null; 1083 if( ch == '_' ) { 1084 if( m_isbold ) { 1085 el = popElement( "b" ); 1086 } else { 1087 el = pushElement( new Element( "b" ) ); 1088 } 1089 m_isbold = !m_isbold; 1090 } else { 1091 pushBack( ch ); 1092 } 1093 1094 return el; 1095 } 1096 1097 1098 /** 1099 * For example: italics. 1100 */ 1101 private Element handleApostrophe() throws IOException { 1102 final int ch = nextToken(); 1103 Element el = null; 1104 1105 if( ch == '\'' ) { 1106 if( m_isitalic ) { 1107 el = popElement( "i" ); 1108 } else { 1109 el = pushElement( new Element( "i" ) ); 1110 } 1111 m_isitalic = !m_isitalic; 1112 } else { 1113 pushBack( ch ); 1114 } 1115 1116 return el; 1117 } 1118 1119 private Element handleOpenbrace( final boolean isBlock ) throws IOException { 1120 final int ch = nextToken(); 1121 if( ch == '{' ) { 1122 final int ch2 = nextToken(); 1123 if( ch2 == '{' ) { 1124 m_isPre = true; 1125 m_isEscaping = true; 1126 m_isPreBlock = isBlock; 1127 if( isBlock ) { 1128 startBlockLevel(); 1129 return pushElement( new Element( "pre" ) ); 1130 } 1131 1132 return pushElement( new Element( "span" ).setAttribute( "class", "inline-code" ) ); 1133 } 1134 pushBack( ch2 ); 1135 return pushElement( new Element( "tt" ) ); 1136 } 1137 pushBack( ch ); 1138 return null; 1139 } 1140 1141 /** 1142 * Handles both }} and }}} 1143 */ 1144 private Element handleClosebrace() throws IOException { 1145 final int ch2 = nextToken(); 1146 if( ch2 == '}' ) { 1147 final int ch3 = nextToken(); 1148 if( ch3 == '}' ) { 1149 if( m_isPre ) { 1150 if( m_isPreBlock ) { 1151 popElement( "pre" ); 1152 } else { 1153 popElement( "span" ); 1154 } 1155 m_isPre = false; 1156 m_isEscaping = false; 1157 return m_currentElement; 1158 } 1159 m_plainTextBuf.append( "}}}" ); 1160 return m_currentElement; 1161 } 1162 pushBack( ch3 ); 1163 if( !m_isEscaping ) { 1164 return popElement( "tt" ); 1165 } 1166 } 1167 pushBack( ch2 ); 1168 return null; 1169 } 1170 1171 private Element handleDash() throws IOException { 1172 int ch = nextToken(); 1173 if( ch == '-' ) { 1174 final int ch2 = nextToken(); 1175 if( ch2 == '-' ) { 1176 final int ch3 = nextToken(); 1177 if( ch3 == '-' ) { 1178 // Empty away all the rest of the dashes. 1179 // Do not forget to return the first non-match back. 1180 do { 1181 ch = nextToken(); 1182 } while ( ch == '-' ); 1183 1184 pushBack( ch ); 1185 startBlockLevel(); 1186 pushElement( new Element( "hr" ) ); 1187 return popElement( "hr" ); 1188 } 1189 pushBack( ch3 ); 1190 } 1191 pushBack( ch2 ); 1192 } 1193 pushBack( ch ); 1194 return null; 1195 } 1196 1197 private Element handleHeading() throws IOException { 1198 final Element el; 1199 final int ch = nextToken(); 1200 final Heading hd = new Heading(); 1201 if( ch == '!' ) { 1202 final int ch2 = nextToken(); 1203 if( ch2 == '!' ) { 1204 final String title = peekAheadLine(); 1205 el = makeHeading( Heading.HEADING_LARGE, title, hd ); 1206 } else { 1207 pushBack( ch2 ); 1208 final String title = peekAheadLine(); 1209 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1210 } 1211 } else { 1212 pushBack( ch ); 1213 final String title = peekAheadLine(); 1214 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1215 } 1216 1217 callHeadingListenerChain( hd ); 1218 m_lastHeading = hd; 1219 if( el != null ) { 1220 pushElement( el ); 1221 } 1222 return el; 1223 } 1224 1225 /** 1226 * Reads the stream until the next EOL or EOF. Note that it will also read the EOL from the stream. 1227 */ 1228 private StringBuilder readUntilEOL() throws IOException { 1229 int ch; 1230 final StringBuilder buf = new StringBuilder( 256 ); 1231 while( true ) { 1232 ch = nextToken(); 1233 if( ch == -1 ) { 1234 break; 1235 } 1236 buf.append( (char) ch ); 1237 if( ch == '\n' ) { 1238 break; 1239 } 1240 } 1241 return buf; 1242 } 1243 1244 /** Controls whether italic is restarted after a paragraph shift */ 1245 1246 private boolean m_restartitalic; 1247 private boolean m_restartbold; 1248 1249 private boolean m_newLine; 1250 1251 /** 1252 * Starts a block level element, therefore closing a potential open paragraph tag. 1253 */ 1254 private void startBlockLevel() { 1255 // These may not continue over block level limits in XHTML 1256 popElement( "i" ); 1257 popElement( "b" ); 1258 popElement( "tt" ); 1259 if( m_isOpenParagraph ) { 1260 m_isOpenParagraph = false; 1261 popElement( "p" ); 1262 m_plainTextBuf.append( "\n" ); // Just small beautification 1263 } 1264 m_restartitalic = m_isitalic; 1265 m_restartbold = m_isbold; 1266 m_isitalic = false; 1267 m_isbold = false; 1268 } 1269 1270 private static String getListType( final char c ) { 1271 if( c == '*' ) { 1272 return "ul"; 1273 } else if( c == '#' ) { 1274 return "ol"; 1275 } 1276 throw new InternalWikiException( "Parser got faulty list type: " + c ); 1277 } 1278 /** 1279 * Like original handleOrderedList() and handleUnorderedList(), 1280 * however handles both ordered ('#') and unordered ('*') mixed together. 1281 */ 1282 // FIXME: Refactor this; it's a bit messy. 1283 private Element handleGeneralList() throws IOException { 1284 startBlockLevel(); 1285 String strBullets = readWhile( "*#" ); 1286 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1287 final int numBullets = strBullets.length(); 1288 1289 // override the beginning portion of bullet pattern to be like the previous to simulate PHPWiki style lists 1290 1291 if( m_allowPHPWikiStyleLists ) { 1292 // only substitute if different 1293 if( !( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) ) { 1294 if( numBullets <= m_genlistlevel ) { 1295 // Substitute all but the last character (keep the expressed bullet preference) 1296 strBullets = ( numBullets > 1 ? m_genlistBulletBuffer.substring( 0, numBullets - 1 ) : "" ) + 1297 strBullets.charAt( numBullets - 1 ); 1298 } else { 1299 strBullets = m_genlistBulletBuffer + strBullets.substring( m_genlistlevel, numBullets ); 1300 } 1301 } 1302 } 1303 1304 // Check if this is still of the same type 1305 if( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) { 1306 if( numBullets > m_genlistlevel ) { 1307 pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel++ ) ) ) ); 1308 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) { 1309 // bullets are growing, get from new bullet list 1310 pushElement( new Element( "li" ) ); 1311 pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel ) ) ) ); 1312 } 1313 } else if( numBullets < m_genlistlevel ) { 1314 // Close the previous list item. 1315 popElement( "li" ); 1316 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) { 1317 // bullets are shrinking, get from old bullet list 1318 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1319 if( m_genlistlevel > 0 ) { 1320 popElement( "li" ); 1321 } 1322 } 1323 } else { 1324 if( m_genlistlevel > 0 ) { 1325 popElement( "li" ); 1326 } 1327 } 1328 } else { 1329 // The pattern has changed, unwind and restart 1330 int numEqualBullets; 1331 final int numCheckBullets; 1332 1333 // find out how much is the same 1334 numEqualBullets = 0; 1335 numCheckBullets = Math.min( numBullets, m_genlistlevel ); 1336 1337 while( numEqualBullets < numCheckBullets ) { 1338 // if the bullets are equal so far, keep going 1339 if( strBullets.charAt( numEqualBullets ) == m_genlistBulletBuffer.charAt( numEqualBullets ) ) 1340 numEqualBullets++; 1341 // otherwise giveup, we have found how many are equal 1342 else 1343 break; 1344 } 1345 1346 //unwind 1347 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) { 1348 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1349 if( m_genlistlevel > numBullets ) { 1350 popElement( "li" ); 1351 } 1352 } 1353 1354 //rewind 1355 pushElement( new Element( getListType( strBullets.charAt( numEqualBullets++ ) ) ) ); 1356 for( int i = numEqualBullets; i < numBullets; i++ ) { 1357 pushElement( new Element( "li" ) ); 1358 pushElement( new Element( getListType( strBullets.charAt( i ) ) ) ); 1359 } 1360 m_genlistlevel = numBullets; 1361 } 1362 1363 // Push a new list item, and eat away any extra whitespace 1364 pushElement( new Element( "li" ) ); 1365 readWhile( " " ); 1366 1367 // work done, remember the new bullet list (in place of old one) 1368 m_genlistBulletBuffer.setLength( 0 ); 1369 m_genlistBulletBuffer.append( strBullets ); 1370 return m_currentElement; 1371 } 1372 1373 private Element unwindGeneralList() { 1374 // unwind 1375 for( ; m_genlistlevel > 0; m_genlistlevel-- ) { 1376 popElement( "li" ); 1377 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1378 } 1379 m_genlistBulletBuffer.setLength( 0 ); 1380 return null; 1381 } 1382 1383 1384 private Element handleDefinitionList() { 1385 if( !m_isdefinition ) { 1386 m_isdefinition = true; 1387 startBlockLevel(); 1388 pushElement( new Element( "dl" ) ); 1389 return pushElement( new Element( "dt" ) ); 1390 } 1391 return null; 1392 } 1393 1394 private Element handleOpenbracket() throws IOException { 1395 final StringBuilder sb = new StringBuilder( 40 ); 1396 final int pos = getPosition(); 1397 int ch = nextToken(); 1398 boolean isPlugin = false; 1399 if( ch == '[' ) { 1400 if( m_wysiwygEditorMode ) { 1401 sb.append( '[' ); 1402 } 1403 sb.append( ( char )ch ); 1404 while( ( ch = nextToken() ) == '[' ) { 1405 sb.append( ( char )ch ); 1406 } 1407 } 1408 1409 if( ch == '{' ) { 1410 isPlugin = true; 1411 } 1412 1413 pushBack( ch ); 1414 1415 if( sb.length() > 0 ) { 1416 m_plainTextBuf.append( sb ); 1417 return m_currentElement; 1418 } 1419 1420 // Find end of hyperlink 1421 ch = nextToken(); 1422 int nesting = 1; // Check for nested plugins 1423 while( ch != -1 ) { 1424 final int ch2 = nextToken(); 1425 pushBack( ch2 ); 1426 if( isPlugin ) { 1427 if( ch == '[' && ch2 == '{' ) { 1428 nesting++; 1429 } else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) { 1430 break; 1431 } else if( ch == '}' && ch2 == ']' ) { 1432 // NB: This will be decremented once at the end 1433 nesting--; 1434 } 1435 } else { 1436 if( ch == ']' ) { 1437 break; 1438 } 1439 } 1440 1441 sb.append( (char) ch ); 1442 1443 ch = nextToken(); 1444 } 1445 1446 // If the link is never finished, do some tricks to display the rest of the line unchanged. 1447 if( ch == -1 ) { 1448 LOG.debug( "Warning: unterminated link detected!" ); 1449 m_isEscaping = true; 1450 m_plainTextBuf.append( sb ); 1451 flushPlainText(); 1452 m_isEscaping = false; 1453 return m_currentElement; 1454 } 1455 1456 return handleHyperlinks( sb.toString(), pos ); 1457 } 1458 1459 /** 1460 * Reads the stream until the current brace is closed or stream end. 1461 */ 1462 private String readBraceContent( final char opening, final char closing ) throws IOException { 1463 final StringBuilder sb = new StringBuilder( 40 ); 1464 int braceLevel = 1; 1465 int ch; 1466 while( ( ch = nextToken() ) != -1 ) { 1467 if( ch == '\\' ) { 1468 continue; 1469 } else if( ch == opening ) { 1470 braceLevel++; 1471 } else if( ch == closing ) { 1472 braceLevel--; 1473 if( braceLevel == 0 ) { 1474 break; 1475 } 1476 } 1477 sb.append( ( char ) ch ); 1478 } 1479 return sb.toString(); 1480 } 1481 1482 1483 /** 1484 * Handles constructs of type %%(style) and %%class 1485 * @return An Element containing the div or span, depending on the situation. 1486 * @throws IOException 1487 */ 1488 private Element handleDiv( ) throws IOException { 1489 int ch = nextToken(); 1490 Element el = null; 1491 1492 if( ch == '%' ) { 1493 String style = null; 1494 String clazz = null; 1495 1496 ch = nextToken(); 1497 1498 // Style or class? 1499 if( ch == '(' ) { 1500 style = readBraceContent('(',')'); 1501 } else if( Character.isLetter( (char) ch ) ) { 1502 pushBack( ch ); 1503 clazz = readUntil( "( \t\n\r" ); 1504 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 1505 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 1506 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 1507 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 1508 1509 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 1510 //(2) remove any invalid character 1511 if( clazz != null ) { 1512 clazz = clazz.replace( '.', ' ' ) 1513 .replaceAll( "[^\\s-_\\w\\x200-\\x377]+", "" ); 1514 } 1515 ch = nextToken(); 1516 1517 // check for %%class1.class2( style information ) 1518 if( ch == '(' ) { 1519 style = readBraceContent( '(', ')' ); 1520 // Pop out only spaces, so that the upcoming EOL check does not check the next line. 1521 } else if( ch == '\n' || ch == '\r' ) { 1522 pushBack( ch ); 1523 } 1524 } else { 1525 // Anything else stops. 1526 pushBack( ch ); 1527 try { 1528 final Boolean isSpan = m_styleStack.pop(); 1529 if( isSpan == null ) { 1530 // Fail quietly 1531 } else if( isSpan ) { 1532 el = popElement( "span" ); 1533 } else { 1534 el = popElement( "div" ); 1535 } 1536 } catch( final EmptyStackException e ) { 1537 LOG.debug( "Page '" + m_context.getName() + "' closes a %%-block that has not been opened." ); 1538 return m_currentElement; 1539 } 1540 return el; 1541 } 1542 1543 // Check if there is an attempt to do something nasty 1544 try { 1545 style = StringEscapeUtils.unescapeHtml4(style); 1546 if( style != null && style.contains( "javascript:" ) ) { 1547 LOG.debug( "Attempt to output javascript within CSS: {}", style ); 1548 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1549 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 1550 } 1551 } catch( final NumberFormatException e ) { 1552 // If there are unknown entities, we don't want the parser to stop. 1553 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1554 final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 1555 return addElement( makeError( msg ) ); 1556 } 1557 1558 // Decide if we should open a div or a span? 1559 final String eol = peekAheadLine(); 1560 1561 if( !eol.trim().isEmpty() ) { 1562 // There is stuff after the class 1563 el = new Element("span"); 1564 m_styleStack.push( Boolean.TRUE ); 1565 } else { 1566 startBlockLevel(); 1567 el = new Element("div"); 1568 m_styleStack.push( Boolean.FALSE ); 1569 } 1570 1571 if( style != null ) el.setAttribute("style", style); 1572 if( clazz != null ) el.setAttribute("class", clazz); 1573 return pushElement( el ); 1574 } 1575 pushBack( ch ); 1576 return el; 1577 } 1578 1579 private Element handleSlash( ) throws IOException { 1580 final int ch = nextToken(); 1581 pushBack( ch ); 1582 if( ch == '%' && !m_styleStack.isEmpty() ) { 1583 return handleDiv(); 1584 } 1585 1586 return null; 1587 } 1588 1589 private Element handleBar( final boolean newLine ) throws IOException { 1590 Element el; 1591 if( !m_istable && !newLine ) { 1592 return null; 1593 } 1594 1595 // If the bar is in the first column, we will either start a new table or continue the old one. 1596 if( newLine ) { 1597 if( !m_istable ) { 1598 startBlockLevel(); 1599 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 1600 m_istable = true; 1601 m_rowNum = 0; 1602 } 1603 1604 m_rowNum++; 1605 final Element tr = ( m_rowNum % 2 != 0 ) 1606 ? new Element("tr").setAttribute("class", "odd") 1607 : new Element("tr"); 1608 el = pushElement( tr ); 1609 } 1610 1611 // Check out which table cell element to start; a header element (th) or a regular element (td). 1612 final int ch = nextToken(); 1613 if( ch == '|' ) { 1614 if( !newLine ) { 1615 el = popElement("th"); 1616 if( el == null ) popElement("td"); 1617 } 1618 el = pushElement( new Element("th") ); 1619 } else { 1620 if( !newLine ) { 1621 el = popElement( "td" ); 1622 if( el == null ) popElement( "th" ); 1623 } 1624 el = pushElement( new Element("td") ); 1625 pushBack( ch ); 1626 } 1627 return el; 1628 } 1629 1630 /** 1631 * Generic escape of next character or entity. 1632 */ 1633 private Element handleTilde() throws IOException { 1634 final int ch = nextToken(); 1635 1636 if( ch == ' ' ) { 1637 if( m_wysiwygEditorMode ) { 1638 m_plainTextBuf.append( "~ " ); 1639 } 1640 return m_currentElement; 1641 } 1642 1643 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 1644 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 1645 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) { 1646 if( m_wysiwygEditorMode ) { 1647 m_plainTextBuf.append( '~' ); 1648 } 1649 m_plainTextBuf.append( ( char ) ch ); 1650 m_plainTextBuf.append( readWhile( "" + ( char ) ch ) ); 1651 return m_currentElement; 1652 } 1653 // No escape. 1654 pushBack( ch ); 1655 return null; 1656 } 1657 1658 private void fillBuffer( final Element startElement ) throws IOException { 1659 m_currentElement = startElement; 1660 m_newLine = true; 1661 boolean quitReading = false; 1662 disableOutputEscaping(); 1663 while( !quitReading ) { 1664 final int ch = nextToken(); 1665 if( ch == -1 ) { 1666 break; 1667 } 1668 1669 // Check if we're actually ending the preformatted mode. We still must do an entity transformation here. 1670 if( m_isEscaping ) { 1671 if( ch == '}' ) { 1672 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 1673 } else if( ch == -1 ) { 1674 quitReading = true; 1675 } 1676 else if( ch == '\r' ) { 1677 // DOS line feeds we ignore. 1678 } else if( ch == '<' ) { 1679 m_plainTextBuf.append( "<" ); 1680 } else if( ch == '>' ) { 1681 m_plainTextBuf.append( ">" ); 1682 } else if( ch == '&' ) { 1683 m_plainTextBuf.append( "&" ); 1684 } else if( ch == '~' ) { 1685 String braces = readWhile( "}" ); 1686 if( braces.length() >= 3 ) { 1687 m_plainTextBuf.append( "}}}" ); 1688 braces = braces.substring(3); 1689 } else { 1690 m_plainTextBuf.append( (char) ch ); 1691 } 1692 1693 for( int i = braces.length()-1; i >= 0; i-- ) { 1694 pushBack( braces.charAt( i ) ); 1695 } 1696 } else { 1697 m_plainTextBuf.append( (char) ch ); 1698 } 1699 1700 continue; 1701 } 1702 1703 // An empty line stops a list 1704 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) { 1705 m_plainTextBuf.append(unwindGeneralList()); 1706 } 1707 1708 if( m_newLine && ch != '|' && m_istable ) { 1709 popElement( "table" ); 1710 m_istable = false; 1711 } 1712 1713 int skip = IGNORE; 1714 // Do the actual parsing and catch any errors. 1715 try { 1716 skip = parseToken( ch ); 1717 } catch( final IllegalDataException e ) { 1718 LOG.info( "Page {} contains data which cannot be added to DOM tree: {}", m_context.getPage().getName(), e.getMessage() ); 1719 makeError( "Error: " + cleanupSuspectData( e.getMessage() ) ); 1720 } 1721 1722 // The idea is as follows: If the handler method returns an element (el != null), it is assumed that it 1723 // has been added in the stack. Otherwise, the character is added as is to the plaintext buffer. 1724 // 1725 // For the transition phase, if s != null, it also gets added in the plaintext buffer. 1726 switch( skip ) { 1727 case ELEMENT: 1728 m_newLine = false; 1729 break; 1730 1731 case CHARACTER: 1732 m_plainTextBuf.append( (char) ch ); 1733 m_newLine = false; 1734 break; 1735 1736 case IGNORE: 1737 default: 1738 break; 1739 } 1740 } 1741 1742 closeHeadings(); 1743 popElement( "domroot" ); 1744 } 1745 1746 private String cleanupSuspectData( final String s ) { 1747 final StringBuilder sb = new StringBuilder( s.length() ); 1748 for( int i = 0; i < s.length(); i++ ) { 1749 final char c = s.charAt(i); 1750 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 1751 else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() ); 1752 } 1753 1754 return sb.toString(); 1755 } 1756 1757 /** The token is a plain character. */ 1758 protected static final int CHARACTER = 0; 1759 1760 /** The token is a wikimarkup element. */ 1761 protected static final int ELEMENT = 1; 1762 1763 /** The token is to be ignored. */ 1764 protected static final int IGNORE = 2; 1765 1766 /** 1767 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 1768 * you think this was a wiki markup element, and IGNORE, if you think 1769 * we should ignore this altogether. 1770 * <p> 1771 * To add your own MarkupParser, you can override this method, but it 1772 * is recommended that you call super.parseToken() as well to gain advantage 1773 * of JSPWiki's own markup. You can call it at the start of your own 1774 * parseToken() or end - it does not matter. 1775 * 1776 * @param ch The character under investigation 1777 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 1778 * @throws IOException If parsing fails. 1779 */ 1780 protected int parseToken( final int ch ) throws IOException { 1781 Element el = null; 1782 // Now, check the incoming token. 1783 switch( ch ) { 1784 case '\r': 1785 // DOS linefeeds we forget 1786 return IGNORE; 1787 1788 case '\n': 1789 // Close things like headings, etc. 1790 // FIXME: This is not really very fast 1791 closeHeadings(); 1792 1793 popElement( "dl" ); // Close definition lists. 1794 if( m_istable ) { 1795 popElement("tr"); 1796 } 1797 m_isdefinition = false; 1798 if( m_newLine ) { 1799 // Paragraph change. 1800 startBlockLevel(); 1801 // Figure out which elements cannot be enclosed inside a <p></p> pair according to XHTML rules. 1802 final String nextLine = peekAheadLine(); 1803 if( nextLine.isEmpty() || 1804 ( !nextLine.isEmpty() && 1805 !nextLine.startsWith( "{{{" ) && 1806 !nextLine.startsWith( "----" ) && 1807 !nextLine.startsWith( "%%" ) && 1808 "*#!;".indexOf( nextLine.charAt( 0 ) ) == -1 ) ) { 1809 pushElement( new Element( "p" ) ); 1810 m_isOpenParagraph = true; 1811 1812 if( m_restartitalic ) { 1813 pushElement( new Element( "i" ) ); 1814 m_isitalic = true; 1815 m_restartitalic = false; 1816 } 1817 if( m_restartbold ) { 1818 pushElement( new Element( "b" ) ); 1819 m_isbold = true; 1820 m_restartbold = false; 1821 } 1822 } 1823 } else { 1824 m_plainTextBuf.append("\n"); 1825 m_newLine = true; 1826 } 1827 return IGNORE; 1828 1829 case '\\': 1830 el = handleBackslash(); 1831 break; 1832 1833 case '_': 1834 el = handleUnderscore(); 1835 break; 1836 1837 case '\'': 1838 el = handleApostrophe(); 1839 break; 1840 1841 case '{': 1842 el = handleOpenbrace( m_newLine ); 1843 break; 1844 1845 case '}': 1846 el = handleClosebrace(); 1847 break; 1848 1849 case '-': 1850 if( m_newLine ) { 1851 el = handleDash(); 1852 } 1853 break; 1854 1855 case '!': 1856 if( m_newLine ) { 1857 el = handleHeading(); 1858 } 1859 break; 1860 1861 case ';': 1862 if( m_newLine ) { 1863 el = handleDefinitionList(); 1864 } 1865 break; 1866 1867 case ':': 1868 if( m_isdefinition ) { 1869 popElement( "dt" ); 1870 el = pushElement( new Element( "dd" ) ); 1871 m_isdefinition = false; 1872 } 1873 break; 1874 1875 case '[': 1876 el = handleOpenbracket(); 1877 break; 1878 1879 case '*': 1880 if( m_newLine ) { 1881 pushBack( '*' ); 1882 el = handleGeneralList(); 1883 } 1884 break; 1885 1886 case '#': 1887 if( m_newLine ) { 1888 pushBack( '#' ); 1889 el = handleGeneralList(); 1890 } 1891 break; 1892 1893 case '|': 1894 el = handleBar( m_newLine ); 1895 break; 1896 1897 case '~': 1898 el = handleTilde(); 1899 break; 1900 1901 case '%': 1902 el = handleDiv(); 1903 break; 1904 1905 case '/': 1906 el = handleSlash(); 1907 break; 1908 1909 default: 1910 break; 1911 } 1912 1913 return el != null ? ELEMENT : CHARACTER; 1914 } 1915 1916 private void closeHeadings() { 1917 if( m_lastHeading != null && !m_wysiwygEditorMode ) { 1918 // Add the hash anchor element at the end of the heading 1919 addElement( new Element("a").setAttribute( "class",HASHLINK ) 1920 .setAttribute( "href","#" + m_lastHeading.m_titleAnchor ) 1921 .setText( "#" ) ); 1922 m_lastHeading = null; 1923 } 1924 popElement( "h2" ); 1925 popElement( "h3" ); 1926 popElement( "h4" ); 1927 } 1928 1929 /** 1930 * Parses the entire document from the Reader given in the constructor or set by {@link #setInputReader(Reader)}. 1931 * 1932 * @return A WikiDocument, ready to be passed to the renderer. 1933 * @throws IOException If parsing cannot be accomplished. 1934 */ 1935 @Override 1936 public WikiDocument parse() throws IOException { 1937 final WikiDocument d = new WikiDocument( m_context.getPage() ); 1938 d.setContext( m_context ); 1939 final Element rootElement = new Element( "domroot" ); 1940 d.setRootElement( rootElement ); 1941 fillBuffer( rootElement ); 1942 paragraphify( rootElement ); 1943 1944 return d; 1945 } 1946 1947 /** 1948 * Checks out that the first paragraph is correctly installed. 1949 * 1950 * @param rootElement element to be checked. 1951 */ 1952 private void paragraphify( final Element rootElement) { 1953 // Add the paragraph tag to the first paragraph 1954 final List< Content > kids = rootElement.getContent(); 1955 if( rootElement.getChild( "p" ) != null ) { 1956 final ArrayList<Content> ls = new ArrayList<>(); 1957 int idxOfFirstContent = 0; 1958 int count = 0; 1959 1960 for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) { 1961 final Content c = i.next(); 1962 if( c instanceof Element ) { 1963 final String name = ( ( Element )c ).getName(); 1964 if( isBlockLevel( name ) ) { 1965 break; 1966 } 1967 } 1968 1969 if( !( c instanceof ProcessingInstruction ) ) { 1970 ls.add( c ); 1971 if( idxOfFirstContent == 0 ) { 1972 idxOfFirstContent = count; 1973 } 1974 } 1975 } 1976 1977 // If there were any elements, then add a new <p> (unless it would be an empty one) 1978 if( ls.size() > 0 ) { 1979 final Element newel = new Element("p"); 1980 for( final Content c : ls ) { 1981 c.detach(); 1982 newel.addContent( c ); 1983 } 1984 1985 // Make sure there are no empty <p/> tags added. 1986 if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() ) { 1987 rootElement.addContent( idxOfFirstContent, newel ); 1988 } 1989 } 1990 } 1991 } 1992 1993}