001 /* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.commons.text.StringEscapeUtils; 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.apache.oro.text.regex.MalformedPatternException; 026import org.apache.oro.text.regex.MatchResult; 027import org.apache.oro.text.regex.Pattern; 028import org.apache.oro.text.regex.PatternCompiler; 029import org.apache.oro.text.regex.PatternMatcher; 030import org.apache.oro.text.regex.Perl5Compiler; 031import org.apache.oro.text.regex.Perl5Matcher; 032import org.apache.wiki.InternalWikiException; 033import org.apache.wiki.StringTransmutator; 034import org.apache.wiki.api.core.Acl; 035import org.apache.wiki.api.core.Context; 036import org.apache.wiki.api.core.ContextEnum; 037import org.apache.wiki.api.core.Page; 038import org.apache.wiki.api.exceptions.PluginException; 039import org.apache.wiki.api.plugin.Plugin; 040import org.apache.wiki.api.spi.Wiki; 041import org.apache.wiki.attachment.AttachmentManager; 042import org.apache.wiki.auth.AuthorizationManager; 043import org.apache.wiki.auth.UserManager; 044import org.apache.wiki.auth.WikiSecurityException; 045import org.apache.wiki.auth.acl.AclManager; 046import org.apache.wiki.i18n.InternationalizationManager; 047import org.apache.wiki.preferences.Preferences; 048import org.apache.wiki.util.TextUtil; 049import org.apache.wiki.util.XmlUtil; 050import org.apache.wiki.variables.VariableManager; 051import org.jdom2.Attribute; 052import org.jdom2.Content; 053import org.jdom2.Element; 054import org.jdom2.IllegalDataException; 055import org.jdom2.ProcessingInstruction; 056import org.jdom2.Verifier; 057 058import javax.xml.transform.Result; 059import java.io.IOException; 060import java.io.Reader; 061import java.io.StringReader; 062import java.text.MessageFormat; 063import java.util.ArrayList; 064import java.util.Arrays; 065import java.util.Collection; 066import java.util.EmptyStackException; 067import java.util.HashMap; 068import java.util.Iterator; 069import java.util.List; 070import java.util.Map; 071import java.util.Properties; 072import java.util.ResourceBundle; 073import java.util.Stack; 074 075/** 076 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the heart and soul of JSPWiki : make 077 * sure you test properly anything that is added, or else it breaks down horribly. 078 * 079 * @since 2.4 080 */ 081public class JSPWikiMarkupParser extends MarkupParser { 082 083 protected static final int READ = 0; 084 protected static final int EDIT = 1; 085 protected static final int EMPTY = 2; // Empty message 086 protected static final int LOCAL = 3; 087 protected static final int LOCALREF = 4; 088 protected static final int IMAGE = 5; 089 protected static final int EXTERNAL = 6; 090 protected static final int INTERWIKI = 7; 091 protected static final int IMAGELINK = 8; 092 protected static final int IMAGEWIKILINK = 9; 093 protected static final int ATTACHMENT = 10; 094 095 private static final Logger LOG = LogManager.getLogger( JSPWikiMarkupParser.class ); 096 097 private boolean m_isbold; 098 private boolean m_isitalic; 099 private boolean m_istable; 100 private boolean m_isPre; 101 private boolean m_isEscaping; 102 private boolean m_isdefinition; 103 private boolean m_isPreBlock; 104 105 /** Contains style information, in multiple forms. */ 106 private final Stack< Boolean > m_styleStack = new Stack<>(); 107 108 // general list handling 109 private int m_genlistlevel; 110 private final StringBuilder m_genlistBulletBuffer = new StringBuilder( 10 ); // stores the # and * pattern 111 private final boolean m_allowPHPWikiStyleLists = true; 112 113 private boolean m_isOpenParagraph; 114 115 /** Parser for extended link functionality. */ 116 private final LinkParser m_linkParser = new LinkParser(); 117 118 /** Keeps track of any plain text that gets put in the Text nodes */ 119 private StringBuilder m_plainTextBuf = new StringBuilder( 20 ); 120 121 private Element m_currentElement; 122 123 /** Keep track of duplicate header names. */ 124 private final Map< String, Integer > m_titleSectionCounter = new HashMap<>(); 125 126 /** If true, then considers CamelCase links as well. */ 127 private boolean m_camelCaseLinks; 128 129 /** If true, then generate special output for wysiwyg editing in certain cases */ 130 private boolean m_wysiwygEditorMode; 131 132 /** If true, consider URIs that have no brackets as well. */ 133 // FIXME: Currently reserved, but not used. 134 private boolean m_plainUris; 135 136 /** If true, all outward links use a small link image. */ 137 private boolean m_useOutlinkImage = true; 138 139 private boolean m_useAttachmentImage = true; 140 141 /** If true, allows raw HTML. */ 142 private boolean m_allowHTML; 143 144 private boolean m_useRelNofollow; 145 146 private final PatternCompiler m_compiler = new Perl5Compiler(); 147 148 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 149 150 private final PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 151 private Pattern m_camelCasePattern; 152 153 private int m_rowNum = 1; 154 155 private Heading m_lastHeading; 156 157 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 158 159 /** 160 * Creates a markup parser. 161 * 162 * @param context The WikiContext which controls the parsing 163 * @param in Where the data is read from. 164 */ 165 public JSPWikiMarkupParser( final Context context, final Reader in ) { 166 super( context, in ); 167 initialize(); 168 } 169 170 // FIXME: parsers should be pooled for better performance. 171 private void initialize() { 172 initInlineImagePatterns(); 173 174 m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN ); 175 if( m_camelCasePattern == null ) { 176 try { 177 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 178 } catch( final MalformedPatternException e ) { 179 LOG.fatal("Internal error: Someone put in a faulty pattern.",e); 180 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 181 } 182 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 183 } 184 185 // Set the properties. 186 final Properties props = m_engine.getWikiProperties(); 187 final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 188 189 if( cclinks != null ) { 190 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 191 } else { 192 m_camelCaseLinks = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks ); 193 } 194 195 final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE ); 196 if( wysiwygVariable != null ) { 197 m_wysiwygEditorMode = wysiwygVariable; 198 } 199 200 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 201 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 202 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 203 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 204 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 205 206 if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) { 207 disableAccessRules(); 208 } 209 210 m_context.getPage().setHasMetadata(); 211 } 212 213 /** 214 * Calls a transmutator chain. 215 * 216 * @param list Chain to call 217 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 218 * @return The result of the mutation. 219 */ 220 protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) { 221 if( list == null || list.isEmpty()) { 222 return text; 223 } 224 225 for( final StringTransmutator m : list ) { 226 text = m.mutate( m_context, text ); 227 } 228 229 return text; 230 } 231 232 /** 233 * Calls the heading listeners. 234 * 235 * @param param A Heading object. 236 */ 237 private void callHeadingListenerChain( final Heading param ) { 238 for( final HeadingListener h : m_headingListenerChain ) { 239 h.headingAdded( m_context, param ); 240 } 241 } 242 243 /** 244 * Creates a JDOM anchor element. Can be overridden to change the URL creation, if you really know what you are doing. 245 * 246 * @param type One of the types above 247 * @param link URL to which to link to 248 * @param text Link text 249 * @param section If a particular section identifier is required. 250 * @return An 'A' element. 251 * @since 2.4.78 252 */ 253 private Element createAnchor( final int type, final String link, String text, String section ) { 254 text = TextUtil.escapeHTMLEntities( text ); 255 section = TextUtil.escapeHTMLEntities( section ); 256 final Element el = new Element( "a" ); 257 el.setAttribute( "class", CLASS_TYPES[ type ] ); 258 el.setAttribute( "href", link + section ); 259 el.addContent( text ); 260 return el; 261 } 262 263 private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) { 264 Element el = null; 265 if( text == null ) { 266 text = link; 267 } 268 text = callMutatorChain( m_linkMutators, text ); 269 section = (section != null) ? ("#"+section) : ""; 270 271 // Make sure we make a link name that can be accepted as a valid URL. 272 if( link.isEmpty() ) { 273 type = EMPTY; 274 } 275 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 276 277 switch( type ) { 278 case READ: 279 el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section ); 280 break; 281 282 case EDIT: 283 el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" ); 284 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 285 break; 286 287 case EMPTY: 288 el = new Element("u").addContent(text); 289 break; 290 291 // These two are for local references - footnotes and references to footnotes. 292 // We embed the page name (or whatever WikiContext gives us) to make sure the links are unique across Wiki. 293 case LOCALREF: 294 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 295 break; 296 297 case LOCAL: 298 el = new Element( "a" ).setAttribute( "class", CLASS_FOOTNOTE ); 299 el.setAttribute( "name", "ref-" + m_context.getName() + "-" + link.substring( 1 ) ); 300 if( !m_allowHTML ) { 301 el.addContent( "[" + TextUtil.escapeHTMLEntities( text ) + "]" ); 302 } else { 303 el.addContent( "[" + text + "]" ); 304 } 305 break; 306 307 // With the image, external and interwiki types we need to make sure nobody can put in Javascript or 308 // something else annoying into the links themselves. We do this by preventing a haxor from stopping 309 // the link name short with quotes in fillBuffer(). 310 case IMAGE: 311 el = new Element( "img" ).setAttribute( "class", "inline" ); 312 el.setAttribute( "src", link ); 313 el.setAttribute( "alt", text ); 314 break; 315 316 case IMAGELINK: 317 el = new Element( "img" ).setAttribute( "class", "inline" ); 318 el.setAttribute( "src", link ); 319 el.setAttribute( "alt", text ); 320 el = createAnchor( IMAGELINK, text, "", "" ).addContent( el ); 321 break; 322 323 case IMAGEWIKILINK: 324 final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text ); 325 el = new Element( "img" ).setAttribute( "class", "inline" ); 326 el.setAttribute( "src", link ); 327 el.setAttribute( "alt", text ); 328 el = createAnchor( IMAGEWIKILINK, pagelink, "", "" ).addContent( el ); 329 break; 330 331 case EXTERNAL: 332 el = createAnchor( EXTERNAL, link, text, section ); 333 if( m_useRelNofollow ) { 334 el.setAttribute( "rel", "nofollow" ); 335 } 336 break; 337 338 case INTERWIKI: 339 el = createAnchor( INTERWIKI, link, text, section ); 340 break; 341 342 case ATTACHMENT: 343 final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link ); 344 final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link ); 345 final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" ); 346 el = createAnchor( ATTACHMENT, attlink, text, "" ); 347 if( m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) { 348 el.setAttribute("download", ""); 349 } 350 351 pushElement( el ); 352 popElement( el.getName() ); 353 354 if( m_useAttachmentImage ) { 355 el = new Element( "img" ).setAttribute( "src", imglink ); 356 el.setAttribute( "border", "0" ); 357 el.setAttribute( "alt", "(info)" ); 358 359 el = new Element( "a" ).setAttribute( "href", infolink ).addContent( el ); 360 el.setAttribute( "class", "infolink" ); 361 } else { 362 el = null; 363 } 364 break; 365 366 default: 367 break; 368 } 369 370 if( el != null && attributes != null ) { 371 while( attributes.hasNext() ) { 372 final Attribute attr = attributes.next(); 373 if( attr != null ) { 374 el.setAttribute( attr ); 375 } 376 } 377 } 378 379 if( el != null ) { 380 flushPlainText(); 381 m_currentElement.addContent( el ); 382 } 383 return el; 384 } 385 386 /** 387 * These are all the HTML 4.01 block-level elements. 388 */ 389 private static final String[] BLOCK_ELEMENTS = { 390 "address", "blockquote", "div", "dl", "fieldset", "form", 391 "h1", "h2", "h3", "h4", "h5", "h6", 392 "hr", "noscript", "ol", "p", "pre", "table", "ul" 393 }; 394 395 private static boolean isBlockLevel( final String name ) { 396 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 397 } 398 399 /** 400 * This method peeks ahead in the stream until EOL and returns the result. It will keep the buffers untouched. 401 * 402 * @return The string from the current position to the end of line. 403 */ 404 // FIXME: Always returns an empty line, even if the stream is full. 405 private String peekAheadLine() throws IOException { 406 final String s = readUntilEOL().toString(); 407 if( s.length() > PUSHBACK_BUFFER_SIZE ) { 408 LOG.warn( "Line is longer than maximum allowed size (" + PUSHBACK_BUFFER_SIZE + " characters. Attempting to recover..." ); 409 pushBack( s.substring( 0, PUSHBACK_BUFFER_SIZE - 1 ) ); 410 } else { 411 try { 412 pushBack( s ); 413 } catch( final IOException e ) { 414 LOG.warn( "Pushback failed: the line is probably too long. Attempting to recover." ); 415 } 416 } 417 return s; 418 } 419 420 private int flushPlainText() { 421 final int numChars = m_plainTextBuf.length(); 422 if( numChars > 0 ) { 423 String buf; 424 425 if( !m_allowHTML ) { 426 buf = TextUtil.escapeHTMLEntities( m_plainTextBuf.toString() ); 427 } else { 428 buf = m_plainTextBuf.toString(); 429 } 430 // We must first empty the buffer because the side effect of calling makeCamelCaseLink() is to call this routine. 431 m_plainTextBuf = new StringBuilder(20); 432 try { 433 // This is the heaviest part of parsing, and therefore we can do some optimization here. 434 // 1) Only when the length of the buffer is big enough, we try to do the match 435 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) { 436 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) { 437 final MatchResult result = m_camelCaseMatcher.getMatch(); 438 final String firstPart = buf.substring( 0, result.beginOffset( 0 ) ); 439 String prefix = result.group( 1 ); 440 if( prefix == null ) { 441 prefix = ""; 442 } 443 444 final String camelCase = result.group(2); 445 final String protocol = result.group(3); 446 String uri = protocol+result.group(4); 447 buf = buf.substring(result.endOffset(0)); 448 449 m_currentElement.addContent( firstPart ); 450 // Check if the user does not wish to do URL or WikiWord expansion 451 if( prefix.endsWith( "~" ) || prefix.indexOf( '[' ) != -1 ) { 452 if( prefix.endsWith( "~" ) ) { 453 if( m_wysiwygEditorMode ) { 454 m_currentElement.addContent( "~" ); 455 } 456 prefix = prefix.substring( 0, prefix.length() - 1 ); 457 } 458 if( camelCase != null ) { 459 m_currentElement.addContent( prefix + camelCase ); 460 } else if( protocol != null ) { 461 m_currentElement.addContent( prefix + uri ); 462 } 463 continue; 464 } 465 466 // Fine, then let's check what kind of link this was and emit the proper elements 467 if( protocol != null ) { 468 final char c = uri.charAt( uri.length() - 1 ); 469 if( c == '.' || c == ',' ) { 470 uri = uri.substring( 0, uri.length() - 1 ); 471 buf = c + buf; 472 } 473 // System.out.println("URI match "+uri); 474 m_currentElement.addContent( prefix ); 475 makeDirectURILink( uri ); 476 } else { 477 // System.out.println("Matched: '"+camelCase+"'"); 478 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 479 // System.out.println("prefix="+prefix); 480 m_currentElement.addContent( prefix ); 481 makeCamelCaseLink( camelCase ); 482 } 483 } 484 m_currentElement.addContent( buf ); 485 } else { 486 // No camelcase asked for, just add the elements 487 m_currentElement.addContent( buf ); 488 } 489 } catch( final IllegalDataException e ) { 490 // Sometimes it's possible that illegal XML chars is added to the data. Here we make sure it does not stop parsing. 491 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 492 } 493 } 494 495 return numChars; 496 } 497 498 private Element pushElement( final Element e ) { 499 flushPlainText(); 500 m_currentElement.addContent( e ); 501 m_currentElement = e; 502 503 return e; 504 } 505 506 private Element addElement( final Content e ) { 507 if( e != null ) { 508 flushPlainText(); 509 m_currentElement.addContent( e ); 510 } 511 return m_currentElement; 512 } 513 514 /** 515 * All elements that can be empty by the HTML DTD. 516 */ 517 // Keep sorted. 518 private static final String[] EMPTY_ELEMENTS = { 519 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 520 }; 521 522 /** 523 * Goes through the current element stack and pops all elements until this 524 * element is found - this essentially "closes" and element. 525 * 526 * @param s element to be found. 527 * @return The new current element, or null, if there was no such element in the entire stack. 528 */ 529 private Element popElement( final String s ) { 530 final int flushedBytes = flushPlainText(); 531 Element currEl = m_currentElement; 532 while( currEl.getParentElement() != null ) { 533 if( currEl.getName().equals( s ) && !currEl.isRootElement() ) { 534 m_currentElement = currEl.getParentElement(); 535 536 // Check if it's okay for this element to be empty. Then we will 537 // trick the JDOM generator into not generating an empty element, 538 // by putting an empty string between the tags. Yes, it's a kludge 539 // but what'cha gonna do about it. :-) 540 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) { 541 currEl.addContent( "" ); 542 } 543 return m_currentElement; 544 } 545 currEl = currEl.getParentElement(); 546 } 547 return null; 548 } 549 550 551 /** 552 * Reads the stream until it meets one of the specified ending characters, or stream end. The ending 553 * character will be left in the stream. 554 */ 555 private String readUntil( final String endChars ) throws IOException { 556 final StringBuilder sb = new StringBuilder( 80 ); 557 int ch = nextToken(); 558 while( ch != -1 ) { 559 if( ch == '\\' ) { 560 ch = nextToken(); 561 if( ch == -1 ) { 562 break; 563 } 564 } else { 565 if( endChars.indexOf( ( char )ch ) != -1 ) { 566 pushBack( ch ); 567 break; 568 } 569 } 570 sb.append( ( char )ch ); 571 ch = nextToken(); 572 } 573 574 return sb.toString(); 575 } 576 577 /** 578 * Reads the stream while the characters that have been specified are 579 * in the stream, returning then the result as a String. 580 */ 581 private String readWhile( final String endChars ) throws IOException { 582 final StringBuilder sb = new StringBuilder( 80 ); 583 int ch = nextToken(); 584 while( ch != -1 ) { 585 if( endChars.indexOf( ( char ) ch ) == -1 ) { 586 pushBack( ch ); 587 break; 588 } 589 sb.append( ( char ) ch ); 590 ch = nextToken(); 591 } 592 593 return sb.toString(); 594 } 595 596 private JSPWikiMarkupParser m_cleanTranslator; 597 598 /** 599 * Does a lazy init. Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before 600 * the TranslatorReader it is contained by is up. 601 */ 602 private JSPWikiMarkupParser getCleanTranslator() { 603 if( m_cleanTranslator == null ) { 604 final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() ); 605 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 606 m_cleanTranslator.m_allowHTML = true; 607 } 608 609 return m_cleanTranslator; 610 } 611 612 /** 613 * Modifies the "hd" parameter to contain proper values. Because 614 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 615 * % after url encoding with '_'. 616 * <p> 617 * Counts also duplicate headings (= headings with similar name), and 618 * attaches a counter. 619 */ 620 protected String makeHeadingAnchor( final String baseName, String title, final Heading hd ) { 621 hd.m_titleText = title; 622 title = MarkupParser.wikifyLink( title ); 623 hd.m_titleSection = m_engine.encodeName(title); 624 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) { 625 final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1; 626 m_titleSectionCounter.put( hd.m_titleSection, count ); 627 hd.m_titleSection += "-" + count; 628 } else { 629 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 630 } 631 632 hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection; 633 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 634 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 635 636 return hd.m_titleAnchor; 637 } 638 639 private String makeSectionTitle( String title ) { 640 title = title.trim(); 641 try { 642 final JSPWikiMarkupParser dtr = getCleanTranslator(); 643 dtr.setInputReader( new StringReader( title ) ); 644 final WikiDocument doc = dtr.parse(); 645 doc.setContext( m_context ); 646 647 return XmlUtil.extractTextFromDocument( doc ); 648 } catch( final IOException e ) { 649 LOG.fatal("Title parsing not working", e ); 650 throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e ); 651 } 652 } 653 654 /** 655 * Returns XHTML for the heading. 656 * 657 * @param level The level of the heading. @see Heading 658 * @param title the title for the heading 659 * @param hd a List to which heading should be added 660 * @return An Element containing the heading 661 */ 662 public Element makeHeading( final int level, final String title, final Heading hd ) { 663 final Element el; 664 final String pageName = m_context.getPage().getName(); 665 final String outTitle = makeSectionTitle( title ); 666 hd.m_level = level; 667 668 switch( level ) { 669 case Heading.HEADING_SMALL: 670 el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 671 break; 672 673 case Heading.HEADING_MEDIUM: 674 el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 675 break; 676 677 case Heading.HEADING_LARGE: 678 el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 679 break; 680 681 default: 682 throw new InternalWikiException( "Illegal heading type " + level ); 683 } 684 685 return el; 686 } 687 688 /** 689 * When given a link to a WikiName, we just return a proper HTML link for it. The local link mutator 690 * chain is also called. 691 */ 692 private Element makeCamelCaseLink( final String wikiname ) { 693 final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 694 callMutatorChain( m_localLinkMutatorChain, wikiname ); 695 if( matchedLink != null ) { 696 makeLink( READ, matchedLink, wikiname, null, null ); 697 } else { 698 makeLink( EDIT, wikiname, wikiname, null, null ); 699 } 700 701 return m_currentElement; 702 } 703 704 /** Holds the image URL for the duration of this parser */ 705 private String m_outlinkImageURL; 706 707 /** 708 * Returns an element for the external link image (out.png). However, this method caches the URL for the lifetime 709 * of this MarkupParser, because it's commonly used, and we'll end up with possibly hundreds our thousands of 710 * references to it... It's a lot faster, too. 711 * 712 * @return An element containing the HTML for the outlink image. 713 */ 714 private Element outlinkImage() { 715 Element el = null; 716 if( m_useOutlinkImage ) { 717 if( m_outlinkImageURL == null ) { 718 m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE ); 719 } 720 721 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 722 el.setAttribute( "src", m_outlinkImageURL ); 723 el.setAttribute( "alt","" ); 724 } 725 726 return el; 727 } 728 729 /** 730 * Takes a URL and turns it into a regular wiki link. Unfortunately, because of the way that flushPlainText() 731 * works, it already encodes all the XML entities. But so does WikiContext.getURL(), so we 732 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 733 * <p> 734 * What a crappy problem. 735 * 736 * @param url provided url. 737 * @return An anchor Element containing the link. 738 */ 739 private Element makeDirectURILink( String url ) { 740 final Element result; 741 String last = null; 742 743 if( url.endsWith( "," ) || url.endsWith( "." ) ) { 744 last = url.substring( url.length() - 1 ); 745 url = url.substring( 0, url.length() - 1 ); 746 } 747 748 callMutatorChain( m_externalLinkMutatorChain, url ); 749 750 if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) { 751 result = handleImageLink( StringUtils.replace( url, "&", "&" ), url, false ); 752 } else { 753 result = makeLink( EXTERNAL, StringUtils.replace( url, "&", "&" ), url, null, null ); 754 addElement( outlinkImage() ); 755 } 756 757 if( last != null ) { 758 m_plainTextBuf.append( last ); 759 } 760 761 return result; 762 } 763 764 /** 765 * Image links are handled differently: 766 * 1. If the text is a WikiName of an existing page, it gets linked. 767 * 2. If the text is an external link, then it is inlined. 768 * 3. Otherwise, it becomes an ALT text. 769 * 770 * @param reallink The link to the image. 771 * @param link Link text portion, may be a link to somewhere else. 772 * @param hasLinkText If true, then the defined link had a link text available. 773 * This means that the link text may be a link to a wiki page, 774 * or an external resource. 775 */ 776 private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) { 777 final String possiblePage = MarkupParser.cleanLink( link ); 778 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) { 779 return makeLink( IMAGELINK, reallink, link, null, null ); 780 } else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) { 781 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 782 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 783 } else { 784 return makeLink( IMAGE, reallink, link, null, null ); 785 } 786 } 787 788 private Element handleAccessRule( String ruleLine ) { 789 if( m_wysiwygEditorMode ) { 790 m_currentElement.addContent( "[" + ruleLine + "]" ); 791 } 792 if( !m_parseAccessRules ) { 793 return m_currentElement; 794 } 795 final Page page = m_context.getRealPage(); 796 // UserDatabase db = m_context.getEngine().getUserDatabase(); 797 798 if( ruleLine.startsWith( "{" ) ) { 799 ruleLine = ruleLine.substring( 1 ); 800 } 801 802 if( ruleLine.endsWith( "}" ) ) { 803 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 804 } 805 806 LOG.debug("page={}, ACL = {}", page.getName(), ruleLine); 807 808 try { 809 final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine ); 810 page.setAcl( acl ); 811 LOG.debug( acl.toString() ); 812 } catch( final WikiSecurityException wse ) { 813 return makeError( wse.getMessage() ); 814 } 815 816 return m_currentElement; 817 } 818 819 /** 820 * Handles metadata setting [{SET foo=bar}] 821 */ 822 private Element handleMetadata( final String link ) { 823 if( m_wysiwygEditorMode ) { 824 m_currentElement.addContent( "[" + link + "]" ); 825 } 826 827 try { 828 final String args = link.substring( link.indexOf(' '), link.length()-1 ); 829 final String name = args.substring( 0, args.indexOf('=') ).trim(); 830 String val = args.substring( args.indexOf('=')+1 ).trim(); 831 832 if( val.startsWith("'") ) { 833 val = val.substring( 1 ); 834 } 835 if( val.endsWith("'") ) { 836 val = val.substring( 0, val.length()-1 ); 837 } 838 839 // LOG.debug("SET name='"+name+"', value='"+val+"'."); 840 841 if( !name.isEmpty() && !val.isEmpty() ) { 842 val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val ); 843 m_context.getPage().setAttribute( name, val ); 844 } 845 } catch( final Exception e ) { 846 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 847 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 848 } 849 850 return m_currentElement; 851 } 852 853 /** 854 * Emits a processing instruction that will disable markup escaping. This is 855 * very useful if you want to emit HTML directly into the stream. 856 */ 857 private void disableOutputEscaping() { 858 addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) ); 859 } 860 861 /** 862 * Gobbles up all hyperlinks that are encased in square brackets. 863 */ 864 private Element handleHyperlinks( String linktext, final int pos ) { 865 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 866 final StringBuilder sb = new StringBuilder( linktext.length() + 80 ); 867 868 if( m_linkParsingOperations.isAccessRule( linktext ) ) { 869 return handleAccessRule( linktext ); 870 } 871 872 if( m_linkParsingOperations.isMetadata( linktext ) ) { 873 return handleMetadata( linktext ); 874 } 875 876 if( m_linkParsingOperations.isPluginLink( linktext ) ) { 877 try { 878 final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 879 880 // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not. 881 if( pluginContent != null ) { 882 addElement( pluginContent ); 883 pluginContent.executeParse( m_context ); 884 } 885 } catch( final PluginException e ) { 886 LOG.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 887 //LOG.info( "Root cause:",e.getRootThrowable() ); 888 if( !m_wysiwygEditorMode ) { 889 final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE ); 890 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 891 m_context.getRealPage().getWiki(), 892 m_context.getRealPage().getName(), 893 e.getMessage() ) ) ); 894 } 895 } 896 return m_currentElement; 897 } 898 899 try { 900 final LinkParser.Link link = m_linkParser.parse( linktext ); 901 linktext = link.getText(); 902 String linkref = link.getReference(); 903 // Yes, we now have the components separated. 904 // linktext = the text the link should have 905 // linkref = the url or page name. 906 // In many cases these are the same. [linktext|linkref]. 907 if( m_linkParsingOperations.isVariableLink( linktext ) ) { 908 final Content el = new VariableContent( linktext ); 909 addElement( el ); 910 } else if( m_linkParsingOperations.isExternalLink( linkref ) ) { 911 // It's an external link, out of this Wiki 912 callMutatorChain( m_externalLinkMutatorChain, linkref ); 913 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 914 handleImageLink( linkref, linktext, link.hasReference() ); 915 } else { 916 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 917 addElement( outlinkImage() ); 918 } 919 } else if( link.isInterwikiLink() ) { 920 // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved. 921 922 // FIXME: There is an interesting issue here: We probably should 923 // URLEncode the wikiPage, but we can't since some of the 924 // Wikis use slashes (/), which won't survive URLEncoding. 925 // Besides, we don't know which character set the other Wiki 926 // is using, so you'll have to write the entire name as it appears 927 // in the URL. Bugger. 928 929 final String extWiki = link.getExternalWiki(); 930 final String wikiPage = link.getExternalWikiPage(); 931 if( m_wysiwygEditorMode ) { 932 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 933 } else { 934 String urlReference = m_engine.getInterWikiURL( extWiki ); 935 if( urlReference != null ) { 936 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 937 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 938 939 if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) { 940 handleImageLink( urlReference, linktext, link.hasReference() ); 941 } else { 942 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 943 } 944 if( m_linkParsingOperations.isExternalLink( urlReference ) ) { 945 addElement( outlinkImage() ); 946 } 947 } else { 948 final Object[] args = { TextUtil.escapeHTMLEntities( extWiki ) }; 949 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 950 } 951 } 952 } else if( linkref.startsWith( "#" ) ) { 953 // It defines a local footnote 954 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 955 } else if( TextUtil.isNumber( linkref ) ) { 956 // It defines a reference to a local footnote 957 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 958 } else { 959 final int hashMark; 960 961 // Internal wiki link, but is it an attachment link? 962 String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref ); 963 if( attachment != null ) { 964 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 965 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 966 attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment ); 967 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 968 } else { 969 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 970 } 971 } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) { 972 // It's an internal Wiki link, but to a named section 973 final String namedSection = linkref.substring( hashMark + 1 ); 974 linkref = linkref.substring( 0, hashMark ); 975 linkref = MarkupParser.cleanLink( linkref ); 976 callMutatorChain( m_localLinkMutatorChain, linkref ); 977 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 978 if( matchedLink != null ) { 979 String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) ); 980 sectref = sectref.replace( '%', '_' ); 981 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 982 } else { 983 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 984 } 985 } else { 986 // It's an internal Wiki link 987 linkref = MarkupParser.cleanLink( linkref ); 988 callMutatorChain( m_localLinkMutatorChain, linkref ); 989 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 990 if( matchedLink != null ) { 991 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 992 } else { 993 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 994 } 995 } 996 } 997 998 } catch( final ParseException e ) { 999 LOG.info( "Parser failure: ", e ); 1000 final Object[] args = { e.getMessage() }; 1001 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1002 } 1003 return m_currentElement; 1004 } 1005 1006 /** 1007 * Pushes back any string that has been read. It will obviously be pushed back in a reverse order. 1008 * 1009 * @since 2.1.77 1010 */ 1011 private void pushBack( final String s ) throws IOException { 1012 for( int i = s.length()-1; i >= 0; i-- ) { 1013 pushBack( s.charAt(i) ); 1014 } 1015 } 1016 1017 private Element handleBackslash() throws IOException { 1018 final int ch = nextToken(); 1019 if( ch == '\\' ) { 1020 final int ch2 = nextToken(); 1021 if( ch2 == '\\' ) { 1022 pushElement( new Element( "br" ).setAttribute( "clear", "all" ) ); 1023 return popElement( "br" ); 1024 } 1025 pushBack( ch2 ); 1026 pushElement( new Element( "br" ) ); 1027 return popElement( "br" ); 1028 } 1029 pushBack( ch ); 1030 return null; 1031 } 1032 1033 private Element handleUnderscore() throws IOException { 1034 final int ch = nextToken(); 1035 Element el = null; 1036 if( ch == '_' ) { 1037 if( m_isbold ) { 1038 el = popElement( "b" ); 1039 } else { 1040 el = pushElement( new Element( "b" ) ); 1041 } 1042 m_isbold = !m_isbold; 1043 } else { 1044 pushBack( ch ); 1045 } 1046 1047 return el; 1048 } 1049 1050 1051 /** 1052 * For example: italics. 1053 */ 1054 private Element handleApostrophe() throws IOException { 1055 final int ch = nextToken(); 1056 Element el = null; 1057 1058 if( ch == '\'' ) { 1059 if( m_isitalic ) { 1060 el = popElement( "i" ); 1061 } else { 1062 el = pushElement( new Element( "i" ) ); 1063 } 1064 m_isitalic = !m_isitalic; 1065 } else { 1066 pushBack( ch ); 1067 } 1068 1069 return el; 1070 } 1071 1072 private Element handleOpenbrace( final boolean isBlock ) throws IOException { 1073 final int ch = nextToken(); 1074 if( ch == '{' ) { 1075 final int ch2 = nextToken(); 1076 if( ch2 == '{' ) { 1077 m_isPre = true; 1078 m_isEscaping = true; 1079 m_isPreBlock = isBlock; 1080 if( isBlock ) { 1081 startBlockLevel(); 1082 return pushElement( new Element( "pre" ) ); 1083 } 1084 1085 return pushElement( new Element( "span" ).setAttribute( "class", "inline-code" ) ); 1086 } 1087 pushBack( ch2 ); 1088 return pushElement( new Element( "tt" ) ); 1089 } 1090 pushBack( ch ); 1091 return null; 1092 } 1093 1094 /** 1095 * Handles both }} and }}} 1096 */ 1097 private Element handleClosebrace() throws IOException { 1098 final int ch2 = nextToken(); 1099 if( ch2 == '}' ) { 1100 final int ch3 = nextToken(); 1101 if( ch3 == '}' ) { 1102 if( m_isPre ) { 1103 if( m_isPreBlock ) { 1104 popElement( "pre" ); 1105 } else { 1106 popElement( "span" ); 1107 } 1108 m_isPre = false; 1109 m_isEscaping = false; 1110 return m_currentElement; 1111 } 1112 m_plainTextBuf.append( "}}}" ); 1113 return m_currentElement; 1114 } 1115 pushBack( ch3 ); 1116 if( !m_isEscaping ) { 1117 return popElement( "tt" ); 1118 } 1119 } 1120 pushBack( ch2 ); 1121 return null; 1122 } 1123 1124 private Element handleDash() throws IOException { 1125 int ch = nextToken(); 1126 if( ch == '-' ) { 1127 final int ch2 = nextToken(); 1128 if( ch2 == '-' ) { 1129 final int ch3 = nextToken(); 1130 if( ch3 == '-' ) { 1131 // Empty away all the rest of the dashes. 1132 // Do not forget to return the first non-match back. 1133 do { 1134 ch = nextToken(); 1135 } while ( ch == '-' ); 1136 1137 pushBack( ch ); 1138 startBlockLevel(); 1139 pushElement( new Element( "hr" ) ); 1140 return popElement( "hr" ); 1141 } 1142 pushBack( ch3 ); 1143 } 1144 pushBack( ch2 ); 1145 } 1146 pushBack( ch ); 1147 return null; 1148 } 1149 1150 private Element handleHeading() throws IOException { 1151 final Element el; 1152 final int ch = nextToken(); 1153 final Heading hd = new Heading(); 1154 if( ch == '!' ) { 1155 final int ch2 = nextToken(); 1156 if( ch2 == '!' ) { 1157 final String title = peekAheadLine(); 1158 el = makeHeading( Heading.HEADING_LARGE, title, hd ); 1159 } else { 1160 pushBack( ch2 ); 1161 final String title = peekAheadLine(); 1162 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1163 } 1164 } else { 1165 pushBack( ch ); 1166 final String title = peekAheadLine(); 1167 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1168 } 1169 1170 callHeadingListenerChain( hd ); 1171 m_lastHeading = hd; 1172 if( el != null ) { 1173 pushElement( el ); 1174 } 1175 return el; 1176 } 1177 1178 /** 1179 * Reads the stream until the next EOL or EOF. Note that it will also read the EOL from the stream. 1180 */ 1181 private StringBuilder readUntilEOL() throws IOException { 1182 int ch; 1183 final StringBuilder buf = new StringBuilder( 256 ); 1184 while( true ) { 1185 ch = nextToken(); 1186 if( ch == -1 ) { 1187 break; 1188 } 1189 buf.append( (char) ch ); 1190 if( ch == '\n' ) { 1191 break; 1192 } 1193 } 1194 return buf; 1195 } 1196 1197 /** Controls whether italic is restarted after a paragraph shift */ 1198 1199 private boolean m_restartitalic; 1200 private boolean m_restartbold; 1201 1202 private boolean m_newLine; 1203 1204 /** 1205 * Starts a block level element, therefore closing a potential open paragraph tag. 1206 */ 1207 private void startBlockLevel() { 1208 // These may not continue over block level limits in XHTML 1209 popElement( "i" ); 1210 popElement( "b" ); 1211 popElement( "tt" ); 1212 if( m_isOpenParagraph ) { 1213 m_isOpenParagraph = false; 1214 popElement( "p" ); 1215 m_plainTextBuf.append( "\n" ); // Just small beautification 1216 } 1217 m_restartitalic = m_isitalic; 1218 m_restartbold = m_isbold; 1219 m_isitalic = false; 1220 m_isbold = false; 1221 } 1222 1223 private static String getListType( final char c ) { 1224 if( c == '*' ) { 1225 return "ul"; 1226 } else if( c == '#' ) { 1227 return "ol"; 1228 } 1229 throw new InternalWikiException( "Parser got faulty list type: " + c ); 1230 } 1231 /** 1232 * Like original handleOrderedList() and handleUnorderedList(), 1233 * however handles both ordered ('#') and unordered ('*') mixed together. 1234 */ 1235 // FIXME: Refactor this; it's a bit messy. 1236 private Element handleGeneralList() throws IOException { 1237 startBlockLevel(); 1238 String strBullets = readWhile( "*#" ); 1239 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1240 final int numBullets = strBullets.length(); 1241 1242 // override the beginning portion of bullet pattern to be like the previous to simulate PHPWiki style lists 1243 1244 if( m_allowPHPWikiStyleLists ) { 1245 // only substitute if different 1246 if( !( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) ) { 1247 if( numBullets <= m_genlistlevel ) { 1248 // Substitute all but the last character (keep the expressed bullet preference) 1249 strBullets = ( numBullets > 1 ? m_genlistBulletBuffer.substring( 0, numBullets - 1 ) : "" ) + 1250 strBullets.charAt( numBullets - 1 ); 1251 } else { 1252 strBullets = m_genlistBulletBuffer + strBullets.substring( m_genlistlevel, numBullets ); 1253 } 1254 } 1255 } 1256 1257 // Check if this is still of the same type 1258 if( strBullets.substring( 0, Math.min( numBullets, m_genlistlevel ) ).equals( m_genlistBulletBuffer.substring( 0, Math.min( numBullets, m_genlistlevel ) ) ) ) { 1259 if( numBullets > m_genlistlevel ) { 1260 pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel++ ) ) ) ); 1261 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) { 1262 // bullets are growing, get from new bullet list 1263 pushElement( new Element( "li" ) ); 1264 pushElement( new Element( getListType( strBullets.charAt( m_genlistlevel ) ) ) ); 1265 } 1266 } else if( numBullets < m_genlistlevel ) { 1267 // Close the previous list item. 1268 popElement( "li" ); 1269 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) { 1270 // bullets are shrinking, get from old bullet list 1271 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1272 if( m_genlistlevel > 0 ) { 1273 popElement( "li" ); 1274 } 1275 } 1276 } else { 1277 if( m_genlistlevel > 0 ) { 1278 popElement( "li" ); 1279 } 1280 } 1281 } else { 1282 // The pattern has changed, unwind and restart 1283 int numEqualBullets; 1284 final int numCheckBullets; 1285 1286 // find out how much is the same 1287 numEqualBullets = 0; 1288 numCheckBullets = Math.min( numBullets, m_genlistlevel ); 1289 1290 while( numEqualBullets < numCheckBullets ) { 1291 // if the bullets are equal so far, keep going 1292 if( strBullets.charAt( numEqualBullets ) == m_genlistBulletBuffer.charAt( numEqualBullets ) ) 1293 numEqualBullets++; 1294 // otherwise giveup, we have found how many are equal 1295 else 1296 break; 1297 } 1298 1299 //unwind 1300 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) { 1301 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1302 if( m_genlistlevel > numBullets ) { 1303 popElement( "li" ); 1304 } 1305 } 1306 1307 //rewind 1308 pushElement( new Element( getListType( strBullets.charAt( numEqualBullets++ ) ) ) ); 1309 for( int i = numEqualBullets; i < numBullets; i++ ) { 1310 pushElement( new Element( "li" ) ); 1311 pushElement( new Element( getListType( strBullets.charAt( i ) ) ) ); 1312 } 1313 m_genlistlevel = numBullets; 1314 } 1315 1316 // Push a new list item, and eat away any extra whitespace 1317 pushElement( new Element( "li" ) ); 1318 readWhile( " " ); 1319 1320 // work done, remember the new bullet list (in place of old one) 1321 m_genlistBulletBuffer.setLength( 0 ); 1322 m_genlistBulletBuffer.append( strBullets ); 1323 return m_currentElement; 1324 } 1325 1326 private Element unwindGeneralList() { 1327 // unwind 1328 for( ; m_genlistlevel > 0; m_genlistlevel-- ) { 1329 popElement( "li" ); 1330 popElement( getListType( m_genlistBulletBuffer.charAt( m_genlistlevel - 1 ) ) ); 1331 } 1332 m_genlistBulletBuffer.setLength( 0 ); 1333 return null; 1334 } 1335 1336 1337 private Element handleDefinitionList() { 1338 if( !m_isdefinition ) { 1339 m_isdefinition = true; 1340 startBlockLevel(); 1341 pushElement( new Element( "dl" ) ); 1342 return pushElement( new Element( "dt" ) ); 1343 } 1344 return null; 1345 } 1346 1347 private Element handleOpenbracket() throws IOException { 1348 final StringBuilder sb = new StringBuilder( 40 ); 1349 final int pos = getPosition(); 1350 int ch = nextToken(); 1351 boolean isPlugin = false; 1352 if( ch == '[' ) { 1353 if( m_wysiwygEditorMode ) { 1354 sb.append( '[' ); 1355 } 1356 sb.append( ( char )ch ); 1357 while( ( ch = nextToken() ) == '[' ) { 1358 sb.append( ( char )ch ); 1359 } 1360 } 1361 1362 if( ch == '{' ) { 1363 isPlugin = true; 1364 } 1365 1366 pushBack( ch ); 1367 1368 if( sb.length() > 0 ) { 1369 m_plainTextBuf.append( sb ); 1370 return m_currentElement; 1371 } 1372 1373 // Find end of hyperlink 1374 ch = nextToken(); 1375 int nesting = 1; // Check for nested plugins 1376 while( ch != -1 ) { 1377 final int ch2 = nextToken(); 1378 pushBack( ch2 ); 1379 if( isPlugin ) { 1380 if( ch == '[' && ch2 == '{' ) { 1381 nesting++; 1382 } else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) { 1383 break; 1384 } else if( ch == '}' && ch2 == ']' ) { 1385 // NB: This will be decremented once at the end 1386 nesting--; 1387 } 1388 } else { 1389 if( ch == ']' ) { 1390 break; 1391 } 1392 } 1393 1394 sb.append( (char) ch ); 1395 1396 ch = nextToken(); 1397 } 1398 1399 // If the link is never finished, do some tricks to display the rest of the line unchanged. 1400 if( ch == -1 ) { 1401 LOG.debug( "Warning: unterminated link detected!" ); 1402 m_isEscaping = true; 1403 m_plainTextBuf.append( sb ); 1404 flushPlainText(); 1405 m_isEscaping = false; 1406 return m_currentElement; 1407 } 1408 1409 return handleHyperlinks( sb.toString(), pos ); 1410 } 1411 1412 /** 1413 * Reads the stream until the current brace is closed or stream end. 1414 */ 1415 private String readBraceContent( final char opening, final char closing ) throws IOException { 1416 final StringBuilder sb = new StringBuilder( 40 ); 1417 int braceLevel = 1; 1418 int ch; 1419 while( ( ch = nextToken() ) != -1 ) { 1420 if( ch == '\\' ) { 1421 continue; 1422 } else if( ch == opening ) { 1423 braceLevel++; 1424 } else if( ch == closing ) { 1425 braceLevel--; 1426 if( braceLevel == 0 ) { 1427 break; 1428 } 1429 } 1430 sb.append( ( char ) ch ); 1431 } 1432 return sb.toString(); 1433 } 1434 1435 1436 /** 1437 * Handles constructs of type %%(style) and %%class 1438 * @return An Element containing the div or span, depending on the situation. 1439 * @throws IOException 1440 */ 1441 private Element handleDiv( ) throws IOException { 1442 int ch = nextToken(); 1443 Element el = null; 1444 1445 if( ch == '%' ) { 1446 String style = null; 1447 String clazz = null; 1448 1449 ch = nextToken(); 1450 1451 // Style or class? 1452 if( ch == '(' ) { 1453 style = readBraceContent('(',')'); 1454 } else if( Character.isLetter( (char) ch ) ) { 1455 pushBack( ch ); 1456 clazz = readUntil( "( \t\n\r" ); 1457 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 1458 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 1459 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 1460 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 1461 1462 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 1463 //(2) remove any invalid character 1464 if( clazz != null ) { 1465 clazz = clazz.replace( '.', ' ' ) 1466 .replaceAll( "[^\\s-_\\w\\x200-\\x377]+", "" ); 1467 } 1468 ch = nextToken(); 1469 1470 // check for %%class1.class2( style information ) 1471 if( ch == '(' ) { 1472 style = readBraceContent( '(', ')' ); 1473 // Pop out only spaces, so that the upcoming EOL check does not check the next line. 1474 } else if( ch == '\n' || ch == '\r' ) { 1475 pushBack( ch ); 1476 } 1477 } else { 1478 // Anything else stops. 1479 pushBack( ch ); 1480 try { 1481 final Boolean isSpan = m_styleStack.pop(); 1482 if( isSpan == null ) { 1483 // Fail quietly 1484 } else if( isSpan ) { 1485 el = popElement( "span" ); 1486 } else { 1487 el = popElement( "div" ); 1488 } 1489 } catch( final EmptyStackException e ) { 1490 LOG.debug( "Page '" + m_context.getName() + "' closes a %%-block that has not been opened." ); 1491 return m_currentElement; 1492 } 1493 return el; 1494 } 1495 1496 // Check if there is an attempt to do something nasty 1497 try { 1498 style = StringEscapeUtils.unescapeHtml4(style); 1499 if( style != null && style.contains( "javascript:" ) ) { 1500 LOG.debug( "Attempt to output javascript within CSS: {}", style ); 1501 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1502 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 1503 } 1504 } catch( final NumberFormatException e ) { 1505 // If there are unknown entities, we don't want the parser to stop. 1506 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1507 final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 1508 return addElement( makeError( msg ) ); 1509 } 1510 1511 // Decide if we should open a div or a span? 1512 final String eol = peekAheadLine(); 1513 1514 if( !eol.trim().isEmpty() ) { 1515 // There is stuff after the class 1516 el = new Element("span"); 1517 m_styleStack.push( Boolean.TRUE ); 1518 } else { 1519 startBlockLevel(); 1520 el = new Element("div"); 1521 m_styleStack.push( Boolean.FALSE ); 1522 } 1523 1524 if( style != null ) el.setAttribute("style", style); 1525 if( clazz != null ) el.setAttribute("class", clazz); 1526 return pushElement( el ); 1527 } 1528 pushBack( ch ); 1529 return el; 1530 } 1531 1532 private Element handleSlash( ) throws IOException { 1533 final int ch = nextToken(); 1534 pushBack( ch ); 1535 if( ch == '%' && !m_styleStack.isEmpty() ) { 1536 return handleDiv(); 1537 } 1538 1539 return null; 1540 } 1541 1542 private Element handleBar( final boolean newLine ) throws IOException { 1543 Element el; 1544 if( !m_istable && !newLine ) { 1545 return null; 1546 } 1547 1548 // If the bar is in the first column, we will either start a new table or continue the old one. 1549 if( newLine ) { 1550 if( !m_istable ) { 1551 startBlockLevel(); 1552 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 1553 m_istable = true; 1554 m_rowNum = 0; 1555 } 1556 1557 m_rowNum++; 1558 final Element tr = ( m_rowNum % 2 != 0 ) 1559 ? new Element("tr").setAttribute("class", "odd") 1560 : new Element("tr"); 1561 el = pushElement( tr ); 1562 } 1563 1564 // Check out which table cell element to start; a header element (th) or a regular element (td). 1565 final int ch = nextToken(); 1566 if( ch == '|' ) { 1567 if( !newLine ) { 1568 el = popElement("th"); 1569 if( el == null ) popElement("td"); 1570 } 1571 el = pushElement( new Element("th") ); 1572 } else { 1573 if( !newLine ) { 1574 el = popElement( "td" ); 1575 if( el == null ) popElement( "th" ); 1576 } 1577 el = pushElement( new Element("td") ); 1578 pushBack( ch ); 1579 } 1580 return el; 1581 } 1582 1583 /** 1584 * Generic escape of next character or entity. 1585 */ 1586 private Element handleTilde() throws IOException { 1587 final int ch = nextToken(); 1588 1589 if( ch == ' ' ) { 1590 if( m_wysiwygEditorMode ) { 1591 m_plainTextBuf.append( "~ " ); 1592 } 1593 return m_currentElement; 1594 } 1595 1596 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 1597 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 1598 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) { 1599 if( m_wysiwygEditorMode ) { 1600 m_plainTextBuf.append( '~' ); 1601 } 1602 m_plainTextBuf.append( ( char ) ch ); 1603 m_plainTextBuf.append( readWhile( "" + ( char ) ch ) ); 1604 return m_currentElement; 1605 } 1606 // No escape. 1607 pushBack( ch ); 1608 return null; 1609 } 1610 1611 private void fillBuffer( final Element startElement ) throws IOException { 1612 m_currentElement = startElement; 1613 m_newLine = true; 1614 boolean quitReading = false; 1615 disableOutputEscaping(); 1616 while( !quitReading ) { 1617 final int ch = nextToken(); 1618 if( ch == -1 ) { 1619 break; 1620 } 1621 1622 // Check if we're actually ending the preformatted mode. We still must do an entity transformation here. 1623 if( m_isEscaping ) { 1624 if( ch == '}' ) { 1625 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 1626 } else if( ch == -1 ) { 1627 quitReading = true; 1628 } 1629 else if( ch == '\r' ) { 1630 // DOS line feeds we ignore. 1631 } else if( ch == '<' ) { 1632 m_plainTextBuf.append( "<" ); 1633 } else if( ch == '>' ) { 1634 m_plainTextBuf.append( ">" ); 1635 } else if( ch == '&' ) { 1636 m_plainTextBuf.append( "&" ); 1637 } else if( ch == '~' ) { 1638 String braces = readWhile( "}" ); 1639 if( braces.length() >= 3 ) { 1640 m_plainTextBuf.append( "}}}" ); 1641 braces = braces.substring(3); 1642 } else { 1643 m_plainTextBuf.append( (char) ch ); 1644 } 1645 1646 for( int i = braces.length()-1; i >= 0; i-- ) { 1647 pushBack( braces.charAt( i ) ); 1648 } 1649 } else { 1650 m_plainTextBuf.append( (char) ch ); 1651 } 1652 1653 continue; 1654 } 1655 1656 // An empty line stops a list 1657 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) { 1658 m_plainTextBuf.append(unwindGeneralList()); 1659 } 1660 1661 if( m_newLine && ch != '|' && m_istable ) { 1662 popElement( "table" ); 1663 m_istable = false; 1664 } 1665 1666 int skip = IGNORE; 1667 // Do the actual parsing and catch any errors. 1668 try { 1669 skip = parseToken( ch ); 1670 } catch( final IllegalDataException e ) { 1671 LOG.info( "Page {} contains data which cannot be added to DOM tree: {}", m_context.getPage().getName(), e.getMessage() ); 1672 makeError( "Error: " + cleanupSuspectData( e.getMessage() ) ); 1673 } 1674 1675 // The idea is as follows: If the handler method returns an element (el != null), it is assumed that it 1676 // has been added in the stack. Otherwise, the character is added as is to the plaintext buffer. 1677 // 1678 // For the transition phase, if s != null, it also gets added in the plaintext buffer. 1679 switch( skip ) { 1680 case ELEMENT: 1681 m_newLine = false; 1682 break; 1683 1684 case CHARACTER: 1685 m_plainTextBuf.append( (char) ch ); 1686 m_newLine = false; 1687 break; 1688 1689 case IGNORE: 1690 default: 1691 break; 1692 } 1693 } 1694 1695 closeHeadings(); 1696 popElement( "domroot" ); 1697 } 1698 1699 private String cleanupSuspectData( final String s ) { 1700 final StringBuilder sb = new StringBuilder( s.length() ); 1701 for( int i = 0; i < s.length(); i++ ) { 1702 final char c = s.charAt(i); 1703 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 1704 else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() ); 1705 } 1706 1707 return sb.toString(); 1708 } 1709 1710 /** The token is a plain character. */ 1711 protected static final int CHARACTER = 0; 1712 1713 /** The token is a wikimarkup element. */ 1714 protected static final int ELEMENT = 1; 1715 1716 /** The token is to be ignored. */ 1717 protected static final int IGNORE = 2; 1718 1719 /** 1720 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 1721 * you think this was a wiki markup element, and IGNORE, if you think 1722 * we should ignore this altogether. 1723 * <p> 1724 * To add your own MarkupParser, you can override this method, but it 1725 * is recommended that you call super.parseToken() as well to gain advantage 1726 * of JSPWiki's own markup. You can call it at the start of your own 1727 * parseToken() or end - it does not matter. 1728 * 1729 * @param ch The character under investigation 1730 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 1731 * @throws IOException If parsing fails. 1732 */ 1733 protected int parseToken( final int ch ) throws IOException { 1734 Element el = null; 1735 // Now, check the incoming token. 1736 switch( ch ) { 1737 case '\r': 1738 // DOS linefeeds we forget 1739 return IGNORE; 1740 1741 case '\n': 1742 // Close things like headings, etc. 1743 // FIXME: This is not really very fast 1744 closeHeadings(); 1745 1746 popElement( "dl" ); // Close definition lists. 1747 if( m_istable ) { 1748 popElement("tr"); 1749 } 1750 m_isdefinition = false; 1751 if( m_newLine ) { 1752 // Paragraph change. 1753 startBlockLevel(); 1754 // Figure out which elements cannot be enclosed inside a <p></p> pair according to XHTML rules. 1755 final String nextLine = peekAheadLine(); 1756 if( nextLine.isEmpty() || 1757 ( !nextLine.isEmpty() && 1758 !nextLine.startsWith( "{{{" ) && 1759 !nextLine.startsWith( "----" ) && 1760 !nextLine.startsWith( "%%" ) && 1761 "*#!;".indexOf( nextLine.charAt( 0 ) ) == -1 ) ) { 1762 pushElement( new Element( "p" ) ); 1763 m_isOpenParagraph = true; 1764 1765 if( m_restartitalic ) { 1766 pushElement( new Element( "i" ) ); 1767 m_isitalic = true; 1768 m_restartitalic = false; 1769 } 1770 if( m_restartbold ) { 1771 pushElement( new Element( "b" ) ); 1772 m_isbold = true; 1773 m_restartbold = false; 1774 } 1775 } 1776 } else { 1777 m_plainTextBuf.append("\n"); 1778 m_newLine = true; 1779 } 1780 return IGNORE; 1781 1782 case '\\': 1783 el = handleBackslash(); 1784 break; 1785 1786 case '_': 1787 el = handleUnderscore(); 1788 break; 1789 1790 case '\'': 1791 el = handleApostrophe(); 1792 break; 1793 1794 case '{': 1795 el = handleOpenbrace( m_newLine ); 1796 break; 1797 1798 case '}': 1799 el = handleClosebrace(); 1800 break; 1801 1802 case '-': 1803 if( m_newLine ) { 1804 el = handleDash(); 1805 } 1806 break; 1807 1808 case '!': 1809 if( m_newLine ) { 1810 el = handleHeading(); 1811 } 1812 break; 1813 1814 case ';': 1815 if( m_newLine ) { 1816 el = handleDefinitionList(); 1817 } 1818 break; 1819 1820 case ':': 1821 if( m_isdefinition ) { 1822 popElement( "dt" ); 1823 el = pushElement( new Element( "dd" ) ); 1824 m_isdefinition = false; 1825 } 1826 break; 1827 1828 case '[': 1829 el = handleOpenbracket(); 1830 break; 1831 1832 case '*': 1833 if( m_newLine ) { 1834 pushBack( '*' ); 1835 el = handleGeneralList(); 1836 } 1837 break; 1838 1839 case '#': 1840 if( m_newLine ) { 1841 pushBack( '#' ); 1842 el = handleGeneralList(); 1843 } 1844 break; 1845 1846 case '|': 1847 el = handleBar( m_newLine ); 1848 break; 1849 1850 case '~': 1851 el = handleTilde(); 1852 break; 1853 1854 case '%': 1855 el = handleDiv(); 1856 break; 1857 1858 case '/': 1859 el = handleSlash(); 1860 break; 1861 1862 default: 1863 break; 1864 } 1865 1866 return el != null ? ELEMENT : CHARACTER; 1867 } 1868 1869 private void closeHeadings() { 1870 if( m_lastHeading != null && !m_wysiwygEditorMode ) { 1871 // Add the hash anchor element at the end of the heading 1872 addElement( new Element("a").setAttribute( "class",HASHLINK ) 1873 .setAttribute( "href","#" + m_lastHeading.m_titleAnchor ) 1874 .setText( "#" ) ); 1875 m_lastHeading = null; 1876 } 1877 popElement( "h2" ); 1878 popElement( "h3" ); 1879 popElement( "h4" ); 1880 } 1881 1882 /** 1883 * Parses the entire document from the Reader given in the constructor or set by {@link #setInputReader(Reader)}. 1884 * 1885 * @return A WikiDocument, ready to be passed to the renderer. 1886 * @throws IOException If parsing cannot be accomplished. 1887 */ 1888 @Override 1889 public WikiDocument parse() throws IOException { 1890 final WikiDocument d = new WikiDocument( m_context.getPage() ); 1891 d.setContext( m_context ); 1892 final Element rootElement = new Element( "domroot" ); 1893 d.setRootElement( rootElement ); 1894 fillBuffer( rootElement ); 1895 paragraphify( rootElement ); 1896 1897 return d; 1898 } 1899 1900 /** 1901 * Checks out that the first paragraph is correctly installed. 1902 * 1903 * @param rootElement element to be checked. 1904 */ 1905 private void paragraphify( final Element rootElement) { 1906 // Add the paragraph tag to the first paragraph 1907 final List< Content > kids = rootElement.getContent(); 1908 if( rootElement.getChild( "p" ) != null ) { 1909 final ArrayList<Content> ls = new ArrayList<>(); 1910 int idxOfFirstContent = 0; 1911 int count = 0; 1912 1913 for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) { 1914 final Content c = i.next(); 1915 if( c instanceof Element ) { 1916 final String name = ( ( Element )c ).getName(); 1917 if( isBlockLevel( name ) ) { 1918 break; 1919 } 1920 } 1921 1922 if( !( c instanceof ProcessingInstruction ) ) { 1923 ls.add( c ); 1924 if( idxOfFirstContent == 0 ) { 1925 idxOfFirstContent = count; 1926 } 1927 } 1928 } 1929 1930 // If there were any elements, then add a new <p> (unless it would be an empty one) 1931 if(!ls.isEmpty()) { 1932 final Element newel = new Element("p"); 1933 for( final Content c : ls ) { 1934 c.detach(); 1935 newel.addContent( c ); 1936 } 1937 1938 // Make sure there are no empty <p/> tags added. 1939 if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() ) { 1940 rootElement.addContent( idxOfFirstContent, newel ); 1941 } 1942 } 1943 } 1944 } 1945 1946}