001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.IOException; 022import java.io.Reader; 023import java.io.StringReader; 024import java.text.MessageFormat; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.EmptyStackException; 029import java.util.HashMap; 030import java.util.Iterator; 031import java.util.List; 032import java.util.Map; 033import java.util.Properties; 034import java.util.ResourceBundle; 035import java.util.Stack; 036 037import javax.xml.transform.Result; 038 039import org.apache.commons.lang.StringEscapeUtils; 040import org.apache.commons.lang.StringUtils; 041import org.apache.log4j.Logger; 042import org.apache.oro.text.regex.MalformedPatternException; 043import org.apache.oro.text.regex.MatchResult; 044import org.apache.oro.text.regex.Pattern; 045import org.apache.oro.text.regex.PatternCompiler; 046import org.apache.oro.text.regex.PatternMatcher; 047import org.apache.oro.text.regex.Perl5Compiler; 048import org.apache.oro.text.regex.Perl5Matcher; 049import org.apache.wiki.InternalWikiException; 050import org.apache.wiki.StringTransmutator; 051import org.apache.wiki.WikiContext; 052import org.apache.wiki.WikiPage; 053import org.apache.wiki.api.exceptions.PluginException; 054import org.apache.wiki.api.plugin.WikiPlugin; 055import org.apache.wiki.auth.WikiSecurityException; 056import org.apache.wiki.auth.acl.Acl; 057import org.apache.wiki.i18n.InternationalizationManager; 058import org.apache.wiki.preferences.Preferences; 059import org.apache.wiki.render.CleanTextRenderer; 060import org.apache.wiki.render.RenderingManager; 061import org.apache.wiki.util.TextUtil; 062import org.jdom2.Attribute; 063import org.jdom2.Content; 064import org.jdom2.Element; 065import org.jdom2.IllegalDataException; 066import org.jdom2.ProcessingInstruction; 067import org.jdom2.Verifier; 068 069/** 070 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 071 * heart and soul of JSPWiki : make sure you test properly anything that is added, 072 * or else it breaks down horribly. 073 * 074 * @since 2.4 075 */ 076public class JSPWikiMarkupParser extends MarkupParser { 077 078 protected static final int READ = 0; 079 protected static final int EDIT = 1; 080 protected static final int EMPTY = 2; // Empty message 081 protected static final int LOCAL = 3; 082 protected static final int LOCALREF = 4; 083 protected static final int IMAGE = 5; 084 protected static final int EXTERNAL = 6; 085 protected static final int INTERWIKI = 7; 086 protected static final int IMAGELINK = 8; 087 protected static final int IMAGEWIKILINK = 9; 088 protected static final int ATTACHMENT = 10; 089 090 private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); 091 092 private boolean m_isbold = false; 093 private boolean m_isitalic = false; 094 private boolean m_istable = false; 095 private boolean m_isPre = false; 096 private boolean m_isEscaping = false; 097 private boolean m_isdefinition = false; 098 private boolean m_isPreBlock = false; 099 100 /** Contains style information, in multiple forms. */ 101 private Stack<Boolean> m_styleStack = new Stack<>(); 102 103 // general list handling 104 private int m_genlistlevel = 0; 105 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 106 private boolean m_allowPHPWikiStyleLists = true; 107 108 private boolean m_isOpenParagraph = false; 109 110 /** Parser for extended link functionality. */ 111 private LinkParser m_linkParser = new LinkParser(); 112 113 /** Keeps track of any plain text that gets put in the Text nodes */ 114 private StringBuilder m_plainTextBuf = new StringBuilder(20); 115 116 private Element m_currentElement; 117 118 /** Keep track of duplicate header names. */ 119 private Map<String, Integer> m_titleSectionCounter = new HashMap<>(); 120 121 /** If true, consider CamelCase hyperlinks as well. */ 122 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 123 124 /** If true, all hyperlinks are translated as well, regardless whether they 125 are surrounded by brackets. */ 126 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 127 128 /** If true, all outward attachment info links have a small link image appended. */ 129 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 130 131 /** If true, then considers CamelCase links as well. */ 132 private boolean m_camelCaseLinks = false; 133 134 /** If true, then generate special output for wysiwyg editing in certain cases */ 135 private boolean m_wysiwygEditorMode = false; 136 137 /** If true, consider URIs that have no brackets as well. */ 138 // FIXME: Currently reserved, but not used. 139 private boolean m_plainUris = false; 140 141 /** If true, all outward links use a small link image. */ 142 private boolean m_useOutlinkImage = true; 143 144 private boolean m_useAttachmentImage = true; 145 146 /** If true, allows raw HTML. */ 147 private boolean m_allowHTML = false; 148 149 private boolean m_useRelNofollow = false; 150 151 private PatternCompiler m_compiler = new Perl5Compiler(); 152 153 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 154 155 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 156 private Pattern m_camelCasePattern; 157 158 private int m_rowNum = 1; 159 160 private Heading m_lastHeading = null; 161 162 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 163 164 /** 165 * Creates a markup parser. 166 * 167 * @param context The WikiContext which controls the parsing 168 * @param in Where the data is read from. 169 */ 170 public JSPWikiMarkupParser( WikiContext context, Reader in ) 171 { 172 super( context, in ); 173 initialize(); 174 } 175 176 // FIXME: parsers should be pooled for better performance. 177 private void initialize() 178 { 179 initInlineImagePatterns(); 180 181 m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN ); 182 if( m_camelCasePattern == null ) 183 { 184 try 185 { 186 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX, 187 Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 188 } 189 catch( MalformedPatternException e ) 190 { 191 log.fatal("Internal error: Someone put in a faulty pattern.",e); 192 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 193 } 194 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 195 } 196 // 197 // Set the properties. 198 // 199 Properties props = m_engine.getWikiProperties(); 200 201 String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 202 203 if( cclinks != null ) 204 { 205 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 206 } 207 else 208 { 209 m_camelCaseLinks = TextUtil.getBooleanProperty( props, 210 PROP_CAMELCASELINKS, 211 m_camelCaseLinks ); 212 } 213 214 Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE ); 215 if( wysiwygVariable != null ) 216 { 217 m_wysiwygEditorMode = wysiwygVariable.booleanValue(); 218 } 219 220 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 221 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 222 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 223 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 224 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 225 226 if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null ) 227 { 228 disableAccessRules(); 229 } 230 231 m_context.getPage().setHasMetadata(); 232 } 233 234 /** 235 * Calls a transmutator chain. 236 * 237 * @param list Chain to call 238 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 239 * @return The result of the mutation. 240 */ 241 protected String callMutatorChain( Collection< StringTransmutator > list, String text ) 242 { 243 if( list == null || list.size() == 0 ) 244 { 245 return text; 246 } 247 248 for( Iterator< StringTransmutator > i = list.iterator(); i.hasNext(); ) 249 { 250 StringTransmutator m = i.next(); 251 252 text = m.mutate( m_context, text ); 253 } 254 255 return text; 256 } 257 258 /** 259 * Calls the heading listeners. 260 * 261 * @param param A Heading object. 262 */ 263 protected void callHeadingListenerChain( Heading param ) 264 { 265 List< HeadingListener > list = m_headingListenerChain; 266 267 for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); ) 268 { 269 HeadingListener h = i.next(); 270 271 h.headingAdded( m_context, param ); 272 } 273 } 274 275 /** 276 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 277 * if you really know what you are doing. 278 * 279 * @param type One of the types above 280 * @param link URL to which to link to 281 * @param text Link text 282 * @param section If a particular section identifier is required. 283 * @return An A element. 284 * @since 2.4.78 285 */ 286 protected Element createAnchor(int type, String link, String text, String section) 287 { 288 text = escapeHTMLEntities( text ); 289 section = escapeHTMLEntities( section ); 290 Element el = new Element("a"); 291 el.setAttribute("class",CLASS_TYPES[type]); 292 el.setAttribute("href",link+section); 293 el.addContent(text); 294 return el; 295 } 296 297 private Element makeLink( int type, String link, String text, String section, Iterator< Attribute > attributes ) 298 { 299 Element el = null; 300 301 if( text == null ) text = link; 302 303 text = callMutatorChain( m_linkMutators, text ); 304 305 section = (section != null) ? ("#"+section) : ""; 306 307 // Make sure we make a link name that can be accepted 308 // as a valid URL. 309 310 if( link.length() == 0 ) 311 { 312 type = EMPTY; 313 } 314 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 315 316 switch(type) 317 { 318 case READ: 319 el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section ); 320 break; 321 322 case EDIT: 323 el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" ); 324 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 325 326 break; 327 328 case EMPTY: 329 el = new Element("u").addContent(text); 330 break; 331 332 // 333 // These two are for local references - footnotes and 334 // references to footnotes. 335 // We embed the page name (or whatever WikiContext gives us) 336 // to make sure the links are unique across Wiki. 337 // 338 case LOCALREF: 339 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 340 break; 341 342 case LOCAL: 343 el = new Element("a").setAttribute("class",CLASS_FOOTNOTE); 344 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 345 el.addContent("["+text+"]"); 346 break; 347 348 // 349 // With the image, external and interwiki types we need to 350 // make sure nobody can put in Javascript or something else 351 // annoying into the links themselves. We do this by preventing 352 // a haxor from stopping the link name short with quotes in 353 // fillBuffer(). 354 // 355 case IMAGE: 356 el = new Element("img").setAttribute("class","inline"); 357 el.setAttribute("src",link); 358 el.setAttribute("alt",text); 359 break; 360 361 case IMAGELINK: 362 el = new Element("img").setAttribute("class","inline"); 363 el.setAttribute("src",link); 364 el.setAttribute("alt",text); 365 el = createAnchor(IMAGELINK,text,"","").addContent(el); 366 break; 367 368 case IMAGEWIKILINK: 369 String pagelink = m_context.getURL(WikiContext.VIEW,text); 370 el = new Element("img").setAttribute("class","inline"); 371 el.setAttribute("src",link); 372 el.setAttribute("alt",text); 373 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 374 break; 375 376 case EXTERNAL: 377 el = createAnchor( EXTERNAL, link, text, section ); 378 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 379 break; 380 381 case INTERWIKI: 382 el = createAnchor( INTERWIKI, link, text, section ); 383 break; 384 385 case ATTACHMENT: 386 String attlink = m_context.getURL( WikiContext.ATTACH, 387 link ); 388 389 String infolink = m_context.getURL( WikiContext.INFO, 390 link ); 391 392 String imglink = m_context.getURL( WikiContext.NONE, 393 "images/attachment_small.png" ); 394 395 el = createAnchor( ATTACHMENT, attlink, text, "" ); 396 397 if( m_engine.getAttachmentManager().forceDownload( attlink ) ) 398 { 399 el.setAttribute("download", ""); 400 } 401 402 pushElement(el); 403 popElement(el.getName()); 404 405 if( m_useAttachmentImage ) 406 { 407 el = new Element("img").setAttribute("src",imglink); 408 el.setAttribute("border","0"); 409 el.setAttribute("alt","(info)"); 410 411 el = new Element("a").setAttribute("href",infolink).addContent(el); 412 el.setAttribute("class","infolink"); 413 } 414 else 415 { 416 el = null; 417 } 418 break; 419 420 default: 421 break; 422 } 423 424 if( el != null && attributes != null ) 425 { 426 while( attributes.hasNext() ) 427 { 428 Attribute attr = attributes.next(); 429 if( attr != null ) 430 { 431 el.setAttribute(attr); 432 } 433 } 434 } 435 436 if( el != null ) 437 { 438 flushPlainText(); 439 m_currentElement.addContent( el ); 440 } 441 return el; 442 } 443 444 445 /** 446 * Figures out if a link is an off-site link. This recognizes 447 * the most common protocols by checking how it starts. 448 * 449 * @param link The link to check. 450 * @return true, if this is a link outside of this wiki. 451 * @since 2.4 452 * @deprecated - use {@link LinkParsingOperations#isExternalLink(String)} instead. 453 */ 454 @Deprecated 455 public static boolean isExternalLink( String link ) 456 { 457 return new LinkParsingOperations(null).isExternalLink( link ); 458 } 459 460 /** 461 * Returns true if the link is really command to insert 462 * a plugin. 463 * <P> 464 * Currently we just check if the link starts with "{INSERT", 465 * or just plain "{" but not "{$". 466 * 467 * @param link Link text, i.e. the contents of text between []. 468 * @return True, if this link seems to be a command to insert a plugin here. 469 * @deprecated Use {@link LinkParsingOperations#isPluginLink(String)} instead, 470 */ 471 @Deprecated 472 public static boolean isPluginLink( String link ) 473 { 474 return new LinkParsingOperations( null ).isPluginLink( link ); 475 } 476 477 /** 478 * These are all of the HTML 4.01 block-level elements. 479 */ 480 private static final String[] BLOCK_ELEMENTS = { 481 "address", "blockquote", "div", "dl", "fieldset", "form", 482 "h1", "h2", "h3", "h4", "h5", "h6", 483 "hr", "noscript", "ol", "p", "pre", "table", "ul" 484 }; 485 486 private static boolean isBlockLevel( String name ) 487 { 488 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 489 } 490 491 /** 492 * This method peeks ahead in the stream until EOL and returns the result. 493 * It will keep the buffers untouched. 494 * 495 * @return The string from the current position to the end of line. 496 */ 497 498 // FIXME: Always returns an empty line, even if the stream is full. 499 private String peekAheadLine() 500 throws IOException 501 { 502 String s = readUntilEOL().toString(); 503 504 if( s.length() > PUSHBACK_BUFFER_SIZE ) 505 { 506 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 507 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 508 } 509 else 510 { 511 try 512 { 513 pushBack( s ); 514 } 515 catch( IOException e ) 516 { 517 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 518 } 519 } 520 return s; 521 } 522 523 private int flushPlainText() 524 { 525 int numChars = m_plainTextBuf.length(); 526 527 if( numChars > 0 ) 528 { 529 String buf; 530 531 if( !m_allowHTML ) 532 { 533 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 534 } 535 else 536 { 537 buf = m_plainTextBuf.toString(); 538 } 539 // 540 // We must first empty the buffer because the side effect of 541 // calling makeCamelCaseLink() is to call this routine. 542 // 543 544 m_plainTextBuf = new StringBuilder(20); 545 546 try 547 { 548 // 549 // This is the heaviest part of parsing, and therefore we can 550 // do some optimization here. 551 // 552 // 1) Only when the length of the buffer is big enough, we try to do the match 553 // 554 555 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 556 { 557 // System.out.println("Buffer="+buf); 558 559 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 560 { 561 MatchResult result = m_camelCaseMatcher.getMatch(); 562 563 String firstPart = buf.substring(0,result.beginOffset(0)); 564 String prefix = result.group(1); 565 566 if( prefix == null ) prefix = ""; 567 568 String camelCase = result.group(2); 569 String protocol = result.group(3); 570 String uri = protocol+result.group(4); 571 buf = buf.substring(result.endOffset(0)); 572 573 m_currentElement.addContent( firstPart ); 574 575 // 576 // Check if the user does not wish to do URL or WikiWord expansion 577 // 578 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 579 { 580 if( prefix.endsWith("~") ) 581 { 582 if( m_wysiwygEditorMode ) 583 { 584 m_currentElement.addContent( "~" ); 585 } 586 prefix = prefix.substring(0,prefix.length()-1); 587 } 588 if( camelCase != null ) 589 { 590 m_currentElement.addContent( prefix+camelCase ); 591 } 592 else if( protocol != null ) 593 { 594 m_currentElement.addContent( prefix+uri ); 595 } 596 continue; 597 } 598 599 // 600 // Fine, then let's check what kind of a link this was 601 // and emit the proper elements 602 // 603 if( protocol != null ) 604 { 605 char c = uri.charAt(uri.length()-1); 606 if( c == '.' || c == ',' ) 607 { 608 uri = uri.substring(0,uri.length()-1); 609 buf = c + buf; 610 } 611 // System.out.println("URI match "+uri); 612 m_currentElement.addContent( prefix ); 613 makeDirectURILink( uri ); 614 } 615 else 616 { 617 // System.out.println("Matched: '"+camelCase+"'"); 618 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 619 // System.out.println("prefix="+prefix); 620 m_currentElement.addContent( prefix ); 621 622 makeCamelCaseLink( camelCase ); 623 } 624 } 625 626 m_currentElement.addContent( buf ); 627 } 628 else 629 { 630 // 631 // No camelcase asked for, just add the elements 632 // 633 m_currentElement.addContent( buf ); 634 } 635 } 636 catch( IllegalDataException e ) 637 { 638 // 639 // Sometimes it's possible that illegal XML chars is added to the data. 640 // Here we make sure it does not stop parsing. 641 // 642 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 643 } 644 } 645 646 return numChars; 647 } 648 649 /** 650 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 651 * entities that are already escaped). 652 * 653 * @param buf 654 * @return An escaped string. 655 */ 656 private String escapeHTMLEntities(String buf) 657 { 658 StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 659 660 for( int i = 0; i < buf.length(); i++ ) 661 { 662 char ch = buf.charAt(i); 663 664 if( ch == '<' ) 665 { 666 tmpBuf.append("<"); 667 } 668 else if( ch == '>' ) 669 { 670 tmpBuf.append(">"); 671 } 672 else if( ch == '\"' ) 673 { 674 tmpBuf.append("""); 675 } 676 else if( ch == '&' ) 677 { 678 // 679 // If the following is an XML entity reference (&#.*;) we'll 680 // leave it as it is; otherwise we'll replace it with an & 681 // 682 683 boolean isEntity = false; 684 StringBuilder entityBuf = new StringBuilder(); 685 686 if( i < buf.length() -1 ) 687 { 688 for( int j = i; j < buf.length(); j++ ) 689 { 690 char ch2 = buf.charAt(j); 691 692 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 693 { 694 entityBuf.append(ch2); 695 696 if( ch2 == ';' ) 697 { 698 isEntity = true; 699 break; 700 } 701 } 702 else 703 { 704 break; 705 } 706 } 707 } 708 709 if( isEntity ) 710 { 711 tmpBuf.append( entityBuf ); 712 i = i + entityBuf.length() - 1; 713 } 714 else 715 { 716 tmpBuf.append("&"); 717 } 718 719 } 720 else 721 { 722 tmpBuf.append( ch ); 723 } 724 } 725 726 return tmpBuf.toString(); 727 } 728 729 private Element pushElement( Element e ) 730 { 731 flushPlainText(); 732 m_currentElement.addContent( e ); 733 m_currentElement = e; 734 735 return e; 736 } 737 738 private Element addElement( Content e ) 739 { 740 if( e != null ) 741 { 742 flushPlainText(); 743 m_currentElement.addContent( e ); 744 } 745 return m_currentElement; 746 } 747 748 /** 749 * All elements that can be empty by the HTML DTD. 750 */ 751 // Keep sorted. 752 private static final String[] EMPTY_ELEMENTS = { 753 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 754 }; 755 756 /** 757 * Goes through the current element stack and pops all elements until this 758 * element is found - this essentially "closes" and element. 759 * 760 * @param s 761 * @return The new current element, or null, if there was no such element in the entire stack. 762 */ 763 private Element popElement( String s ) 764 { 765 int flushedBytes = flushPlainText(); 766 767 Element currEl = m_currentElement; 768 769 while( currEl.getParentElement() != null ) 770 { 771 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 772 { 773 m_currentElement = currEl.getParentElement(); 774 775 // 776 // Check if it's okay for this element to be empty. Then we will 777 // trick the JDOM generator into not generating an empty element, 778 // by putting an empty string between the tags. Yes, it's a kludge 779 // but what'cha gonna do about it. :-) 780 // 781 782 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 783 { 784 currEl.addContent(""); 785 } 786 787 return m_currentElement; 788 } 789 790 currEl = currEl.getParentElement(); 791 } 792 793 return null; 794 } 795 796 797 /** 798 * Reads the stream until it meets one of the specified 799 * ending characters, or stream end. The ending character will be left 800 * in the stream. 801 */ 802 private String readUntil( String endChars ) 803 throws IOException 804 { 805 StringBuilder sb = new StringBuilder( 80 ); 806 int ch = nextToken(); 807 808 while( ch != -1 ) 809 { 810 if( ch == '\\' ) 811 { 812 ch = nextToken(); 813 if( ch == -1 ) 814 { 815 break; 816 } 817 } 818 else 819 { 820 if( endChars.indexOf((char)ch) != -1 ) 821 { 822 pushBack( ch ); 823 break; 824 } 825 } 826 sb.append( (char) ch ); 827 ch = nextToken(); 828 } 829 830 return sb.toString(); 831 } 832 833 /** 834 * Reads the stream while the characters that have been specified are 835 * in the stream, returning then the result as a String. 836 */ 837 private String readWhile( String endChars ) 838 throws IOException 839 { 840 StringBuilder sb = new StringBuilder( 80 ); 841 int ch = nextToken(); 842 843 while( ch != -1 ) 844 { 845 if( endChars.indexOf((char)ch) == -1 ) 846 { 847 pushBack( ch ); 848 break; 849 } 850 851 sb.append( (char) ch ); 852 ch = nextToken(); 853 } 854 855 return sb.toString(); 856 } 857 858 private JSPWikiMarkupParser m_cleanTranslator; 859 860 /** 861 * Does a lazy init. Otherwise, we would get into a situation 862 * where HTMLRenderer would try and boot a TranslatorReader before 863 * the TranslatorReader it is contained by is up. 864 */ 865 private JSPWikiMarkupParser getCleanTranslator() 866 { 867 if( m_cleanTranslator == null ) 868 { 869 WikiContext dummyContext = new WikiContext( m_engine, 870 m_context.getHttpRequest(), 871 m_context.getPage() ); 872 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 873 874 m_cleanTranslator.m_allowHTML = true; 875 } 876 877 return m_cleanTranslator; 878 } 879 /** 880 * Modifies the "hd" parameter to contain proper values. Because 881 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 882 * % after url encoding with '_'. 883 * <p> 884 * Counts also duplicate headings (= headings with similar name), and 885 * attaches a counter. 886 */ 887 private String makeHeadingAnchor( String baseName, String title, Heading hd ) 888 { 889 hd.m_titleText = title; 890 title = MarkupParser.wikifyLink( title ); 891 892 hd.m_titleSection = m_engine.encodeName(title); 893 894 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) 895 { 896 Integer count = m_titleSectionCounter.get( hd.m_titleSection ); 897 count = count + 1; 898 m_titleSectionCounter.put( hd.m_titleSection, count ); 899 hd.m_titleSection += "-" + count; 900 } 901 else 902 { 903 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 904 } 905 906 hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+ 907 "-"+hd.m_titleSection; 908 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 909 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 910 911 return hd.m_titleAnchor; 912 } 913 914 private String makeSectionTitle( String title ) 915 { 916 title = title.trim(); 917 String outTitle; 918 919 try 920 { 921 JSPWikiMarkupParser dtr = getCleanTranslator(); 922 dtr.setInputReader( new StringReader(title) ); 923 924 CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse()); 925 926 outTitle = ctt.getString(); 927 } 928 catch( IOException e ) 929 { 930 log.fatal("CleanTranslator not working", e); 931 throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e); 932 } 933 934 return outTitle; 935 } 936 937 /** 938 * Returns XHTML for the heading. 939 * 940 * @param level The level of the heading. @see Heading 941 * @param title the title for the heading 942 * @param hd a List to which heading should be added 943 * @return An Element containing the heading 944 */ 945 public Element makeHeading( int level, String title, Heading hd ) 946 { 947 Element el = null; 948 949 String pageName = m_context.getPage().getName(); 950 951 String outTitle = makeSectionTitle( title ); 952 953 hd.m_level = level; 954 955 switch( level ) 956 { 957 case Heading.HEADING_SMALL: 958 el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 959 break; 960 961 case Heading.HEADING_MEDIUM: 962 el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 963 break; 964 965 case Heading.HEADING_LARGE: 966 el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 967 break; 968 969 default: 970 throw new InternalWikiException("Illegal heading type "+level); 971 } 972 973 974 return el; 975 } 976 977 /** 978 * When given a link to a WikiName, we just return 979 * a proper HTML link for it. The local link mutator 980 * chain is also called. 981 */ 982 private Element makeCamelCaseLink( String wikiname ) 983 { 984 String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 985 986 callMutatorChain( m_localLinkMutatorChain, wikiname ); 987 988 if( matchedLink != null ) { 989 makeLink( READ, matchedLink, wikiname, null, null ); 990 } else { 991 makeLink( EDIT, wikiname, wikiname, null, null ); 992 } 993 994 return m_currentElement; 995 } 996 997 /** Holds the image URL for the duration of this parser */ 998 private String m_outlinkImageURL = null; 999 1000 /** 1001 * Returns an element for the external link image (out.png). However, 1002 * this method caches the URL for the lifetime of this MarkupParser, 1003 * because it's commonly used, and we'll end up with possibly hundreds 1004 * our thousands of references to it... It's a lot faster, too. 1005 * 1006 * @return An element containing the HTML for the outlink image. 1007 */ 1008 private Element outlinkImage() 1009 { 1010 Element el = null; 1011 1012 if( m_useOutlinkImage ) 1013 { 1014 if( m_outlinkImageURL == null ) 1015 { 1016 m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE ); 1017 } 1018 1019 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 1020 el.setAttribute( "src", m_outlinkImageURL ); 1021 el.setAttribute( "alt","" ); 1022 } 1023 1024 return el; 1025 } 1026 1027 /** 1028 * Takes an URL and turns it into a regular wiki link. Unfortunately, 1029 * because of the way that flushPlainText() works, it already encodes 1030 * all of the XML entities. But so does WikiContext.getURL(), so we 1031 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 1032 * <p> 1033 * What a crappy problem. 1034 * 1035 * @param url 1036 * @return An anchor Element containing the link. 1037 */ 1038 private Element makeDirectURILink( String url ) 1039 { 1040 Element result; 1041 String last = null; 1042 1043 if( url.endsWith(",") || url.endsWith(".") ) 1044 { 1045 last = url.substring( url.length()-1 ); 1046 url = url.substring( 0, url.length()-1 ); 1047 } 1048 1049 callMutatorChain( m_externalLinkMutatorChain, url ); 1050 1051 if( m_linkParsingOperations.isImageLink( url ) ) 1052 { 1053 result = handleImageLink( StringUtils.replace(url,"&","&"), url, false ); 1054 } 1055 else 1056 { 1057 result = makeLink( EXTERNAL, StringUtils.replace(url,"&","&"), url, null, null ); 1058 addElement( outlinkImage() ); 1059 } 1060 1061 if( last != null ) 1062 { 1063 m_plainTextBuf.append(last); 1064 } 1065 1066 return result; 1067 } 1068 1069 /** 1070 * Image links are handled differently: 1071 * 1. If the text is a WikiName of an existing page, 1072 * it gets linked. 1073 * 2. If the text is an external link, then it is inlined. 1074 * 3. Otherwise it becomes an ALT text. 1075 * 1076 * @param reallink The link to the image. 1077 * @param link Link text portion, may be a link to somewhere else. 1078 * @param hasLinkText If true, then the defined link had a link text available. 1079 * This means that the link text may be a link to a wiki page, 1080 * or an external resource. 1081 */ 1082 1083 // FIXME: isExternalLink() is called twice. 1084 private Element handleImageLink( String reallink, String link, boolean hasLinkText ) 1085 { 1086 String possiblePage = MarkupParser.cleanLink( link ); 1087 1088 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) 1089 { 1090 return makeLink( IMAGELINK, reallink, link, null, null ); 1091 } 1092 else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) 1093 { 1094 // System.out.println("Orig="+link+", Matched: "+matchedLink); 1095 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 1096 1097 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 1098 } 1099 else 1100 { 1101 return makeLink( IMAGE, reallink, link, null, null ); 1102 } 1103 } 1104 1105 private Element handleAccessRule( String ruleLine ) 1106 { 1107 if( m_wysiwygEditorMode ) 1108 { 1109 m_currentElement.addContent( "[" + ruleLine + "]" ); 1110 } 1111 1112 if( !m_parseAccessRules ) return m_currentElement; 1113 Acl acl; 1114 WikiPage page = m_context.getRealPage(); 1115 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1116 1117 if( ruleLine.startsWith( "{" ) ) 1118 ruleLine = ruleLine.substring( 1 ); 1119 if( ruleLine.endsWith( "}" ) ) 1120 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1121 1122 if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine); 1123 1124 try 1125 { 1126 acl = m_engine.getAclManager().parseAcl( page, ruleLine ); 1127 1128 page.setAcl( acl ); 1129 1130 if( log.isDebugEnabled() ) log.debug( acl.toString() ); 1131 } 1132 catch( WikiSecurityException wse ) 1133 { 1134 return makeError( wse.getMessage() ); 1135 } 1136 1137 return m_currentElement; 1138 } 1139 1140 /** 1141 * Handles metadata setting [{SET foo=bar}] 1142 */ 1143 private Element handleMetadata( String link ) 1144 { 1145 if( m_wysiwygEditorMode ) 1146 { 1147 m_currentElement.addContent( "[" + link + "]" ); 1148 } 1149 1150 try 1151 { 1152 String args = link.substring( link.indexOf(' '), link.length()-1 ); 1153 1154 String name = args.substring( 0, args.indexOf('=') ); 1155 String val = args.substring( args.indexOf('=')+1, args.length() ); 1156 1157 name = name.trim(); 1158 val = val.trim(); 1159 1160 if( val.startsWith("'") ) val = val.substring( 1 ); 1161 if( val.endsWith("'") ) val = val.substring( 0, val.length()-1 ); 1162 1163 // log.debug("SET name='"+name+"', value='"+val+"'."); 1164 1165 if( name.length() > 0 && val.length() > 0 ) 1166 { 1167 val = m_engine.getVariableManager().expandVariables( m_context, 1168 val ); 1169 1170 m_context.getPage().setAttribute( name, val ); 1171 } 1172 } 1173 catch( Exception e ) 1174 { 1175 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1176 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1177 } 1178 1179 return m_currentElement; 1180 } 1181 1182 /** 1183 * Emits a processing instruction that will disable markup escaping. This is 1184 * very useful if you want to emit HTML directly into the stream. 1185 * 1186 */ 1187 private void disableOutputEscaping() 1188 { 1189 addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") ); 1190 } 1191 1192 /** 1193 * Gobbles up all hyperlinks that are encased in square brackets. 1194 */ 1195 private Element handleHyperlinks( String linktext, int pos ) 1196 { 1197 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1198 1199 StringBuilder sb = new StringBuilder(linktext.length()+80); 1200 1201 if( m_linkParsingOperations.isAccessRule( linktext ) ) 1202 { 1203 return handleAccessRule( linktext ); 1204 } 1205 1206 if( m_linkParsingOperations.isMetadata( linktext ) ) 1207 { 1208 return handleMetadata( linktext ); 1209 } 1210 1211 if( m_linkParsingOperations.isPluginLink( linktext ) ) 1212 { 1213 try 1214 { 1215 PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1216 // 1217 // This might sometimes fail, especially if there is something which looks 1218 // like a plugin invocation but is really not. 1219 // 1220 if( pluginContent != null ) 1221 { 1222 addElement( pluginContent ); 1223 1224 pluginContent.executeParse( m_context ); 1225 } 1226 } 1227 catch( PluginException e ) 1228 { 1229 log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 1230 //log.info( "Root cause:",e.getRootThrowable() ); 1231 if( !m_wysiwygEditorMode ) 1232 { 1233 ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1234 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 1235 m_context.getRealPage().getWiki(), 1236 m_context.getRealPage().getName(), 1237 e.getMessage() ) ) ); 1238 } 1239 } 1240 1241 return m_currentElement; 1242 } 1243 1244 try 1245 { 1246 LinkParser.Link link = m_linkParser.parse(linktext); 1247 linktext = link.getText(); 1248 String linkref = link.getReference(); 1249 1250 // 1251 // Yes, we now have the components separated. 1252 // linktext = the text the link should have 1253 // linkref = the url or page name. 1254 // 1255 // In many cases these are the same. [linktext|linkref]. 1256 // 1257 if( m_linkParsingOperations.isVariableLink( linktext ) ) 1258 { 1259 Content el = new VariableContent(linktext); 1260 1261 addElement( el ); 1262 } 1263 else if( m_linkParsingOperations.isExternalLink( linkref ) ) 1264 { 1265 // It's an external link, out of this Wiki 1266 1267 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1268 1269 if( m_linkParsingOperations.isImageLink( linkref ) ) 1270 { 1271 handleImageLink( linkref, linktext, link.hasReference() ); 1272 } 1273 else 1274 { 1275 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1276 addElement( outlinkImage() ); 1277 } 1278 } 1279 else if( link.isInterwikiLink() ) 1280 { 1281 // It's an interwiki link 1282 // InterWiki links also get added to external link chain 1283 // after the links have been resolved. 1284 1285 // FIXME: There is an interesting issue here: We probably should 1286 // URLEncode the wikiPage, but we can't since some of the 1287 // Wikis use slashes (/), which won't survive URLEncoding. 1288 // Besides, we don't know which character set the other Wiki 1289 // is using, so you'll have to write the entire name as it appears 1290 // in the URL. Bugger. 1291 1292 String extWiki = link.getExternalWiki(); 1293 String wikiPage = link.getExternalWikiPage(); 1294 1295 if( m_wysiwygEditorMode ) 1296 { 1297 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1298 } 1299 else 1300 { 1301 String urlReference = m_engine.getInterWikiURL( extWiki ); 1302 1303 if( urlReference != null ) 1304 { 1305 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1306 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1307 1308 if( m_linkParsingOperations.isImageLink(urlReference) ) 1309 { 1310 handleImageLink( urlReference, linktext, link.hasReference() ); 1311 } 1312 else 1313 { 1314 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1315 } 1316 1317 if( m_linkParsingOperations.isExternalLink(urlReference) ) 1318 { 1319 addElement( outlinkImage() ); 1320 } 1321 } 1322 else 1323 { 1324 Object[] args = { escapeHTMLEntities(extWiki) }; 1325 1326 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1327 } 1328 } 1329 } 1330 else if( linkref.startsWith("#") ) 1331 { 1332 // It defines a local footnote 1333 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1334 } 1335 else if( TextUtil.isNumber( linkref ) ) 1336 { 1337 // It defines a reference to a local footnote 1338 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1339 } 1340 else 1341 { 1342 int hashMark = -1; 1343 1344 // 1345 // Internal wiki link, but is it an attachment link? 1346 // 1347 String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref ); 1348 if( attachment != null ) 1349 { 1350 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1351 1352 if( m_linkParsingOperations.isImageLink( linkref ) ) 1353 { 1354 attachment = m_context.getURL( WikiContext.ATTACH, attachment ); 1355 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1356 } 1357 else 1358 { 1359 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1360 } 1361 } 1362 else if( (hashMark = linkref.indexOf('#')) != -1 ) 1363 { 1364 // It's an internal Wiki link, but to a named section 1365 1366 String namedSection = linkref.substring( hashMark+1 ); 1367 linkref = linkref.substring( 0, hashMark ); 1368 1369 linkref = MarkupParser.cleanLink( linkref ); 1370 1371 callMutatorChain( m_localLinkMutatorChain, linkref ); 1372 1373 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1374 if( matchedLink != null ) { 1375 String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection)); 1376 sectref = sectref.replace('%', '_'); 1377 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1378 } else { 1379 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1380 } 1381 } 1382 else 1383 { 1384 // It's an internal Wiki link 1385 linkref = MarkupParser.cleanLink( linkref ); 1386 1387 callMutatorChain( m_localLinkMutatorChain, linkref ); 1388 1389 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1390 if( matchedLink != null ) { 1391 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1392 } else { 1393 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1394 } 1395 } 1396 } 1397 } 1398 catch( ParseException e ) 1399 { 1400 log.info("Parser failure: ",e); 1401 Object[] args = { e.getMessage() }; 1402 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1403 } 1404 1405 return m_currentElement; 1406 } 1407 1408 /** 1409 * Pushes back any string that has been read. It will obviously 1410 * be pushed back in a reverse order. 1411 * 1412 * @since 2.1.77 1413 */ 1414 private void pushBack( String s ) 1415 throws IOException 1416 { 1417 for( int i = s.length()-1; i >= 0; i-- ) 1418 { 1419 pushBack( s.charAt(i) ); 1420 } 1421 } 1422 1423 private Element handleBackslash() 1424 throws IOException 1425 { 1426 int ch = nextToken(); 1427 1428 if( ch == '\\' ) 1429 { 1430 int ch2 = nextToken(); 1431 1432 if( ch2 == '\\' ) 1433 { 1434 pushElement( new Element("br").setAttribute("clear","all")); 1435 return popElement("br"); 1436 } 1437 1438 pushBack( ch2 ); 1439 1440 pushElement( new Element("br") ); 1441 return popElement("br"); 1442 } 1443 1444 pushBack( ch ); 1445 1446 return null; 1447 } 1448 1449 private Element handleUnderscore() 1450 throws IOException 1451 { 1452 int ch = nextToken(); 1453 Element el = null; 1454 1455 if( ch == '_' ) 1456 { 1457 if( m_isbold ) 1458 { 1459 el = popElement("b"); 1460 } 1461 else 1462 { 1463 el = pushElement( new Element("b") ); 1464 } 1465 m_isbold = !m_isbold; 1466 } 1467 else 1468 { 1469 pushBack( ch ); 1470 } 1471 1472 return el; 1473 } 1474 1475 1476 /** 1477 * For example: italics. 1478 */ 1479 private Element handleApostrophe() 1480 throws IOException 1481 { 1482 int ch = nextToken(); 1483 Element el = null; 1484 1485 if( ch == '\'' ) 1486 { 1487 if( m_isitalic ) 1488 { 1489 el = popElement("i"); 1490 } 1491 else 1492 { 1493 el = pushElement( new Element("i") ); 1494 } 1495 m_isitalic = !m_isitalic; 1496 } 1497 else 1498 { 1499 pushBack( ch ); 1500 } 1501 1502 return el; 1503 } 1504 1505 private Element handleOpenbrace( boolean isBlock ) 1506 throws IOException 1507 { 1508 int ch = nextToken(); 1509 1510 if( ch == '{' ) 1511 { 1512 int ch2 = nextToken(); 1513 1514 if( ch2 == '{' ) 1515 { 1516 m_isPre = true; 1517 m_isEscaping = true; 1518 m_isPreBlock = isBlock; 1519 1520 if( isBlock ) 1521 { 1522 startBlockLevel(); 1523 return pushElement( new Element("pre") ); 1524 } 1525 1526 return pushElement( new Element("span").setAttribute("class","inline-code") ); 1527 } 1528 1529 pushBack( ch2 ); 1530 1531 return pushElement( new Element("tt") ); 1532 } 1533 1534 pushBack( ch ); 1535 1536 return null; 1537 } 1538 1539 /** 1540 * Handles both }} and }}} 1541 */ 1542 private Element handleClosebrace() 1543 throws IOException 1544 { 1545 int ch2 = nextToken(); 1546 1547 if( ch2 == '}' ) 1548 { 1549 int ch3 = nextToken(); 1550 1551 if( ch3 == '}' ) 1552 { 1553 if( m_isPre ) 1554 { 1555 if( m_isPreBlock ) 1556 { 1557 popElement( "pre" ); 1558 } 1559 else 1560 { 1561 popElement( "span" ); 1562 } 1563 1564 m_isPre = false; 1565 m_isEscaping = false; 1566 return m_currentElement; 1567 } 1568 1569 m_plainTextBuf.append("}}}"); 1570 return m_currentElement; 1571 } 1572 1573 pushBack( ch3 ); 1574 1575 if( !m_isEscaping ) 1576 { 1577 return popElement("tt"); 1578 } 1579 } 1580 1581 pushBack( ch2 ); 1582 1583 return null; 1584 } 1585 1586 private Element handleDash() 1587 throws IOException 1588 { 1589 int ch = nextToken(); 1590 1591 if( ch == '-' ) 1592 { 1593 int ch2 = nextToken(); 1594 1595 if( ch2 == '-' ) 1596 { 1597 int ch3 = nextToken(); 1598 1599 if( ch3 == '-' ) 1600 { 1601 // Empty away all the rest of the dashes. 1602 // Do not forget to return the first non-match back. 1603 do 1604 { 1605 ch = nextToken(); 1606 } 1607 while ( ch == '-' ); 1608 1609 pushBack(ch); 1610 startBlockLevel(); 1611 pushElement( new Element("hr") ); 1612 return popElement( "hr" ); 1613 } 1614 1615 pushBack( ch3 ); 1616 } 1617 pushBack( ch2 ); 1618 } 1619 1620 pushBack( ch ); 1621 1622 return null; 1623 } 1624 1625 private Element handleHeading() 1626 throws IOException 1627 { 1628 Element el = null; 1629 1630 int ch = nextToken(); 1631 1632 Heading hd = new Heading(); 1633 1634 if( ch == '!' ) 1635 { 1636 int ch2 = nextToken(); 1637 1638 if( ch2 == '!' ) 1639 { 1640 String title = peekAheadLine(); 1641 1642 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1643 } 1644 else 1645 { 1646 pushBack( ch2 ); 1647 String title = peekAheadLine(); 1648 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1649 } 1650 } 1651 else 1652 { 1653 pushBack( ch ); 1654 String title = peekAheadLine(); 1655 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1656 } 1657 1658 callHeadingListenerChain( hd ); 1659 1660 m_lastHeading = hd; 1661 1662 if( el != null ) pushElement(el); 1663 1664 return el; 1665 } 1666 1667 /** 1668 * Reads the stream until the next EOL or EOF. Note that it will also read the 1669 * EOL from the stream. 1670 */ 1671 private StringBuilder readUntilEOL() 1672 throws IOException 1673 { 1674 int ch; 1675 StringBuilder buf = new StringBuilder( 256 ); 1676 1677 while( true ) 1678 { 1679 ch = nextToken(); 1680 1681 if( ch == -1 ) 1682 break; 1683 1684 buf.append( (char) ch ); 1685 1686 if( ch == '\n' ) 1687 break; 1688 } 1689 return buf; 1690 } 1691 1692 /** Controls whether italic is restarted after a paragraph shift */ 1693 1694 private boolean m_restartitalic = false; 1695 private boolean m_restartbold = false; 1696 1697 private boolean m_newLine; 1698 1699 /** 1700 * Starts a block level element, therefore closing 1701 * a potential open paragraph tag. 1702 */ 1703 private void startBlockLevel() 1704 { 1705 // These may not continue over block level limits in XHTML 1706 1707 popElement("i"); 1708 popElement("b"); 1709 popElement("tt"); 1710 1711 if( m_isOpenParagraph ) 1712 { 1713 m_isOpenParagraph = false; 1714 popElement("p"); 1715 m_plainTextBuf.append("\n"); // Just small beautification 1716 } 1717 1718 m_restartitalic = m_isitalic; 1719 m_restartbold = m_isbold; 1720 1721 m_isitalic = false; 1722 m_isbold = false; 1723 } 1724 1725 private static String getListType( char c ) 1726 { 1727 if( c == '*' ) 1728 { 1729 return "ul"; 1730 } 1731 else if( c == '#' ) 1732 { 1733 return "ol"; 1734 } 1735 throw new InternalWikiException("Parser got faulty list type: "+c); 1736 } 1737 /** 1738 * Like original handleOrderedList() and handleUnorderedList() 1739 * however handles both ordered ('#') and unordered ('*') mixed together. 1740 */ 1741 1742 // FIXME: Refactor this; it's a bit messy. 1743 1744 private Element handleGeneralList() 1745 throws IOException 1746 { 1747 startBlockLevel(); 1748 1749 String strBullets = readWhile( "*#" ); 1750 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1751 int numBullets = strBullets.length(); 1752 1753 // override the beginning portion of bullet pattern to be like the previous 1754 // to simulate PHPWiki style lists 1755 1756 if(m_allowPHPWikiStyleLists) 1757 { 1758 // only substitute if different 1759 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1760 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 1761 { 1762 if(numBullets <= m_genlistlevel) 1763 { 1764 // Substitute all but the last character (keep the expressed bullet preference) 1765 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 1766 + strBullets.substring(numBullets-1, numBullets); 1767 } 1768 else 1769 { 1770 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 1771 } 1772 } 1773 } 1774 1775 // 1776 // Check if this is still of the same type 1777 // 1778 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1779 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 1780 { 1781 if( numBullets > m_genlistlevel ) 1782 { 1783 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 1784 1785 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 1786 { 1787 // bullets are growing, get from new bullet list 1788 pushElement( new Element("li") ); 1789 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 1790 } 1791 } 1792 else if( numBullets < m_genlistlevel ) 1793 { 1794 // Close the previous list item. 1795 // buf.append( m_renderer.closeListItem() ); 1796 popElement( "li" ); 1797 1798 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 1799 { 1800 // bullets are shrinking, get from old bullet list 1801 1802 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1803 if( m_genlistlevel > 0 ) 1804 { 1805 popElement( "li" ); 1806 } 1807 1808 } 1809 } 1810 else 1811 { 1812 if( m_genlistlevel > 0 ) 1813 { 1814 popElement( "li" ); 1815 } 1816 } 1817 } 1818 else 1819 { 1820 // 1821 // The pattern has changed, unwind and restart 1822 // 1823 int numEqualBullets; 1824 int numCheckBullets; 1825 1826 // find out how much is the same 1827 numEqualBullets = 0; 1828 numCheckBullets = Math.min(numBullets,m_genlistlevel); 1829 1830 while( numEqualBullets < numCheckBullets ) 1831 { 1832 // if the bullets are equal so far, keep going 1833 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 1834 numEqualBullets++; 1835 // otherwise giveup, we have found how many are equal 1836 else 1837 break; 1838 } 1839 1840 //unwind 1841 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 1842 { 1843 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 1844 if( m_genlistlevel > numBullets ) 1845 { 1846 popElement("li"); 1847 } 1848 } 1849 1850 //rewind 1851 1852 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 1853 for(int i = numEqualBullets; i < numBullets; i++) 1854 { 1855 pushElement( new Element("li") ); 1856 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 1857 } 1858 m_genlistlevel = numBullets; 1859 } 1860 1861 // 1862 // Push a new list item, and eat away any extra whitespace 1863 // 1864 pushElement( new Element("li") ); 1865 readWhile(" "); 1866 1867 // work done, remember the new bullet list (in place of old one) 1868 m_genlistBulletBuffer.setLength(0); 1869 m_genlistBulletBuffer.append(strBullets); 1870 1871 return m_currentElement; 1872 } 1873 1874 private Element unwindGeneralList() 1875 { 1876 //unwind 1877 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 1878 { 1879 popElement( "li" ); 1880 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1881 } 1882 1883 m_genlistBulletBuffer.setLength(0); 1884 1885 return null; 1886 } 1887 1888 1889 private Element handleDefinitionList() 1890 throws IOException 1891 { 1892 if( !m_isdefinition ) 1893 { 1894 m_isdefinition = true; 1895 1896 startBlockLevel(); 1897 1898 pushElement( new Element("dl") ); 1899 return pushElement( new Element("dt") ); 1900 } 1901 1902 return null; 1903 } 1904 1905 private Element handleOpenbracket() 1906 throws IOException 1907 { 1908 StringBuilder sb = new StringBuilder(40); 1909 int pos = getPosition(); 1910 int ch = nextToken(); 1911 boolean isPlugin = false; 1912 1913 if( ch == '[' ) 1914 { 1915 if( m_wysiwygEditorMode ) 1916 { 1917 sb.append( '[' ); 1918 } 1919 1920 sb.append( (char)ch ); 1921 1922 while( (ch = nextToken()) == '[' ) 1923 { 1924 sb.append( (char)ch ); 1925 } 1926 } 1927 1928 1929 if( ch == '{' ) 1930 { 1931 isPlugin = true; 1932 } 1933 1934 pushBack( ch ); 1935 1936 if( sb.length() > 0 ) 1937 { 1938 m_plainTextBuf.append( sb ); 1939 return m_currentElement; 1940 } 1941 1942 // 1943 // Find end of hyperlink 1944 // 1945 1946 ch = nextToken(); 1947 int nesting = 1; // Check for nested plugins 1948 1949 while( ch != -1 ) 1950 { 1951 int ch2 = nextToken(); pushBack(ch2); 1952 1953 if( isPlugin ) 1954 { 1955 if( ch == '[' && ch2 == '{' ) 1956 { 1957 nesting++; 1958 } 1959 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 1960 { 1961 break; 1962 } 1963 else if( ch == '}' && ch2 == ']' ) 1964 { 1965 // NB: This will be decremented once at the end 1966 nesting--; 1967 } 1968 } 1969 else 1970 { 1971 if( ch == ']' ) 1972 { 1973 break; 1974 } 1975 } 1976 1977 sb.append( (char) ch ); 1978 1979 ch = nextToken(); 1980 } 1981 1982 // 1983 // If the link is never finished, do some tricks to display the rest of the line 1984 // unchanged. 1985 // 1986 if( ch == -1 ) 1987 { 1988 log.debug("Warning: unterminated link detected!"); 1989 m_isEscaping = true; 1990 m_plainTextBuf.append( sb ); 1991 flushPlainText(); 1992 m_isEscaping = false; 1993 return m_currentElement; 1994 } 1995 1996 return handleHyperlinks( sb.toString(), pos ); 1997 } 1998 1999 /** 2000 * Reads the stream until the current brace is closed or stream end. 2001 */ 2002 private String readBraceContent( char opening, char closing ) 2003 throws IOException 2004 { 2005 StringBuilder sb = new StringBuilder(40); 2006 int braceLevel = 1; 2007 int ch; 2008 while(( ch = nextToken() ) != -1 ) 2009 { 2010 if( ch == '\\' ) 2011 { 2012 continue; 2013 } 2014 else if ( ch == opening ) 2015 { 2016 braceLevel++; 2017 } 2018 else if ( ch == closing ) 2019 { 2020 braceLevel--; 2021 if (braceLevel==0) 2022 { 2023 break; 2024 } 2025 } 2026 sb.append( (char)ch ); 2027 } 2028 return sb.toString(); 2029 } 2030 2031 2032 /** 2033 * Handles constructs of type %%(style) and %%class 2034 * @param newLine 2035 * @return An Element containing the div or span, depending on the situation. 2036 * @throws IOException 2037 */ 2038 private Element handleDiv( boolean newLine ) 2039 throws IOException 2040 { 2041 int ch = nextToken(); 2042 Element el = null; 2043 2044 if( ch == '%' ) 2045 { 2046 String style = null; 2047 String clazz = null; 2048 2049 ch = nextToken(); 2050 2051 // 2052 // Style or class? 2053 // 2054 if( ch == '(' ) 2055 { 2056 style = readBraceContent('(',')'); 2057 } 2058 else if( Character.isLetter( (char) ch ) ) 2059 { 2060 pushBack( ch ); 2061 clazz = readUntil( "( \t\n\r" ); 2062 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 2063 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 2064 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 2065 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 2066 2067 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 2068 //(2) remove any invalid character 2069 if( clazz != null){ 2070 2071 clazz = clazz.replace('.', ' ') 2072 .replaceAll("[^\\s-_\\w\\x200-\\x377]+",""); 2073 2074 } 2075 ch = nextToken(); 2076 2077 //check for %%class1.class2( style information ) 2078 if( ch == '(' ) 2079 { 2080 style = readBraceContent('(',')'); 2081 } 2082 // 2083 // Pop out only spaces, so that the upcoming EOL check does not check the 2084 // next line. 2085 // 2086 else if( ch == '\n' || ch == '\r' ) 2087 { 2088 pushBack(ch); 2089 } 2090 } 2091 else 2092 { 2093 // 2094 // Anything else stops. 2095 // 2096 2097 pushBack(ch); 2098 2099 try 2100 { 2101 Boolean isSpan = m_styleStack.pop(); 2102 2103 if( isSpan == null ) 2104 { 2105 // Fail quietly 2106 } 2107 else if( isSpan.booleanValue() ) 2108 { 2109 el = popElement( "span" ); 2110 } 2111 else 2112 { 2113 el = popElement( "div" ); 2114 } 2115 } 2116 catch( EmptyStackException e ) 2117 { 2118 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 2119 return m_currentElement; 2120 } 2121 2122 return el; 2123 } 2124 2125 // 2126 // Check if there is an attempt to do something nasty 2127 // 2128 2129 try 2130 { 2131 style = StringEscapeUtils.unescapeHtml(style); 2132 if( style != null && style.indexOf("javascript:") != -1 ) 2133 { 2134 log.debug("Attempt to output javascript within CSS:"+style); 2135 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2136 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 2137 } 2138 } 2139 catch( NumberFormatException e ) 2140 { 2141 // 2142 // If there are unknown entities, we don't want the parser to stop. 2143 // 2144 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2145 String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 2146 return addElement( makeError( msg ) ); 2147 } 2148 2149 // 2150 // Decide if we should open a div or a span? 2151 // 2152 String eol = peekAheadLine(); 2153 2154 if( eol.trim().length() > 0 ) 2155 { 2156 // There is stuff after the class 2157 2158 el = new Element("span"); 2159 2160 m_styleStack.push( Boolean.TRUE ); 2161 } 2162 else 2163 { 2164 startBlockLevel(); 2165 el = new Element("div"); 2166 m_styleStack.push( Boolean.FALSE ); 2167 } 2168 2169 if( style != null ) el.setAttribute("style", style); 2170 if( clazz != null ) el.setAttribute("class", clazz); 2171 el = pushElement( el ); 2172 2173 return el; 2174 } 2175 2176 pushBack(ch); 2177 2178 return el; 2179 } 2180 2181 private Element handleSlash( boolean newLine ) 2182 throws IOException 2183 { 2184 int ch = nextToken(); 2185 2186 pushBack(ch); 2187 if( ch == '%' && !m_styleStack.isEmpty() ) 2188 { 2189 return handleDiv( newLine ); 2190 } 2191 2192 return null; 2193 } 2194 2195 private Element handleBar( boolean newLine ) 2196 throws IOException 2197 { 2198 Element el = null; 2199 2200 if( !m_istable && !newLine ) 2201 { 2202 return null; 2203 } 2204 2205 // 2206 // If the bar is in the first column, we will either start 2207 // a new table or continue the old one. 2208 // 2209 2210 if( newLine ) 2211 { 2212 if( !m_istable ) 2213 { 2214 startBlockLevel(); 2215 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2216 m_istable = true; 2217 m_rowNum = 0; 2218 } 2219 2220 m_rowNum++; 2221 Element tr = ( m_rowNum % 2 != 0 ) 2222 ? new Element("tr").setAttribute("class", "odd") 2223 : new Element("tr"); 2224 el = pushElement( tr ); 2225 } 2226 2227 // 2228 // Check out which table cell element to start; 2229 // a header element (th) or a regular element (td). 2230 // 2231 int ch = nextToken(); 2232 2233 if( ch == '|' ) 2234 { 2235 if( !newLine ) 2236 { 2237 el = popElement("th"); 2238 if( el == null ) popElement("td"); 2239 } 2240 el = pushElement( new Element("th") ); 2241 } 2242 else 2243 { 2244 if( !newLine ) 2245 { 2246 el = popElement("td"); 2247 if( el == null ) popElement("th"); 2248 } 2249 2250 el = pushElement( new Element("td") ); 2251 2252 pushBack( ch ); 2253 } 2254 2255 return el; 2256 } 2257 2258 /** 2259 * Generic escape of next character or entity. 2260 */ 2261 private Element handleTilde() 2262 throws IOException 2263 { 2264 int ch = nextToken(); 2265 2266 if( ch == ' ' ) 2267 { 2268 if( m_wysiwygEditorMode ) 2269 { 2270 m_plainTextBuf.append( "~ " ); 2271 } 2272 return m_currentElement; 2273 } 2274 2275 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2276 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2277 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2278 { 2279 if( m_wysiwygEditorMode ) 2280 { 2281 m_plainTextBuf.append( '~' ); 2282 } 2283 2284 m_plainTextBuf.append( (char)ch ); 2285 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2286 return m_currentElement; 2287 } 2288 2289 // No escape. 2290 pushBack( ch ); 2291 2292 return null; 2293 } 2294 2295 private void fillBuffer( Element startElement ) 2296 throws IOException 2297 { 2298 m_currentElement = startElement; 2299 2300 boolean quitReading = false; 2301 m_newLine = true; 2302 disableOutputEscaping(); 2303 2304 while(!quitReading) 2305 { 2306 int ch = nextToken(); 2307 2308 if( ch == -1 ) break; 2309 2310 // 2311 // Check if we're actually ending the preformatted mode. 2312 // We still must do an entity transformation here. 2313 // 2314 if( m_isEscaping ) 2315 { 2316 if( ch == '}' ) 2317 { 2318 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2319 } 2320 else if( ch == -1 ) 2321 { 2322 quitReading = true; 2323 } 2324 else if( ch == '\r' ) 2325 { 2326 // DOS line feeds we ignore. 2327 } 2328 else if( ch == '<' ) 2329 { 2330 m_plainTextBuf.append( "<" ); 2331 } 2332 else if( ch == '>' ) 2333 { 2334 m_plainTextBuf.append( ">" ); 2335 } 2336 else if( ch == '&' ) 2337 { 2338 m_plainTextBuf.append( "&" ); 2339 } 2340 else if( ch == '~' ) 2341 { 2342 String braces = readWhile("}"); 2343 if( braces.length() >= 3 ) 2344 { 2345 m_plainTextBuf.append("}}}"); 2346 2347 braces = braces.substring(3); 2348 } 2349 else 2350 { 2351 m_plainTextBuf.append( (char) ch ); 2352 } 2353 2354 for( int i = braces.length()-1; i >= 0; i-- ) 2355 { 2356 pushBack(braces.charAt(i)); 2357 } 2358 } 2359 else 2360 { 2361 m_plainTextBuf.append( (char) ch ); 2362 } 2363 2364 continue; 2365 } 2366 2367 // 2368 // An empty line stops a list 2369 // 2370 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2371 { 2372 m_plainTextBuf.append(unwindGeneralList()); 2373 } 2374 2375 if( m_newLine && ch != '|' && m_istable ) 2376 { 2377 popElement("table"); 2378 m_istable = false; 2379 } 2380 2381 int skip = IGNORE; 2382 2383 // 2384 // Do the actual parsing and catch any errors. 2385 // 2386 try 2387 { 2388 skip = parseToken( ch ); 2389 } 2390 catch( IllegalDataException e ) 2391 { 2392 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2393 2394 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2395 } 2396 2397 // 2398 // The idea is as follows: If the handler method returns 2399 // an element (el != null), it is assumed that it has been 2400 // added in the stack. Otherwise the character is added 2401 // as is to the plaintext buffer. 2402 // 2403 // For the transition phase, if s != null, it also gets 2404 // added in the plaintext buffer. 2405 // 2406 2407 switch( skip ) 2408 { 2409 case ELEMENT: 2410 m_newLine = false; 2411 break; 2412 2413 case CHARACTER: 2414 m_plainTextBuf.append( (char) ch ); 2415 m_newLine = false; 2416 break; 2417 2418 case IGNORE: 2419 default: 2420 break; 2421 } 2422 } 2423 2424 closeHeadings(); 2425 popElement("domroot"); 2426 } 2427 2428 private String cleanupSuspectData( String s ) 2429 { 2430 StringBuilder sb = new StringBuilder( s.length() ); 2431 2432 for( int i = 0; i < s.length(); i++ ) 2433 { 2434 char c = s.charAt(i); 2435 2436 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2437 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); 2438 } 2439 2440 return sb.toString(); 2441 } 2442 2443 /** The token is a plain character. */ 2444 protected static final int CHARACTER = 0; 2445 2446 /** The token is a wikimarkup element. */ 2447 protected static final int ELEMENT = 1; 2448 2449 /** The token is to be ignored. */ 2450 protected static final int IGNORE = 2; 2451 2452 /** 2453 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2454 * you think this was a wiki markup element, and IGNORE, if you think 2455 * we should ignore this altogether. 2456 * <p> 2457 * To add your own MarkupParser, you can override this method, but it 2458 * is recommended that you call super.parseToken() as well to gain advantage 2459 * of JSPWiki's own markup. You can call it at the start of your own 2460 * parseToken() or end - it does not matter. 2461 * 2462 * @param ch The character under investigation 2463 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2464 * @throws IOException If parsing fails. 2465 */ 2466 protected int parseToken( int ch ) 2467 throws IOException 2468 { 2469 Element el = null; 2470 2471 // 2472 // Now, check the incoming token. 2473 // 2474 switch( ch ) 2475 { 2476 case '\r': 2477 // DOS linefeeds we forget 2478 return IGNORE; 2479 2480 case '\n': 2481 // 2482 // Close things like headings, etc. 2483 // 2484 2485 // FIXME: This is not really very fast 2486 2487 closeHeadings(); 2488 2489 popElement("dl"); // Close definition lists. 2490 if( m_istable ) 2491 { 2492 popElement("tr"); 2493 } 2494 2495 m_isdefinition = false; 2496 2497 if( m_newLine ) 2498 { 2499 // Paragraph change. 2500 startBlockLevel(); 2501 2502 // 2503 // Figure out which elements cannot be enclosed inside 2504 // a <p></p> pair according to XHTML rules. 2505 // 2506 String nextLine = peekAheadLine(); 2507 if( nextLine.length() == 0 || 2508 (nextLine.length() > 0 && 2509 !nextLine.startsWith("{{{") && 2510 !nextLine.startsWith("----") && 2511 !nextLine.startsWith("%%") && 2512 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2513 { 2514 pushElement( new Element("p") ); 2515 m_isOpenParagraph = true; 2516 2517 if( m_restartitalic ) 2518 { 2519 pushElement( new Element("i") ); 2520 m_isitalic = true; 2521 m_restartitalic = false; 2522 } 2523 if( m_restartbold ) 2524 { 2525 pushElement( new Element("b") ); 2526 m_isbold = true; 2527 m_restartbold = false; 2528 } 2529 } 2530 } 2531 else 2532 { 2533 m_plainTextBuf.append("\n"); 2534 m_newLine = true; 2535 } 2536 return IGNORE; 2537 2538 2539 case '\\': 2540 el = handleBackslash(); 2541 break; 2542 2543 case '_': 2544 el = handleUnderscore(); 2545 break; 2546 2547 case '\'': 2548 el = handleApostrophe(); 2549 break; 2550 2551 case '{': 2552 el = handleOpenbrace( m_newLine ); 2553 break; 2554 2555 case '}': 2556 el = handleClosebrace(); 2557 break; 2558 2559 case '-': 2560 if( m_newLine ) 2561 el = handleDash(); 2562 2563 break; 2564 2565 case '!': 2566 if( m_newLine ) 2567 { 2568 el = handleHeading(); 2569 } 2570 break; 2571 2572 case ';': 2573 if( m_newLine ) 2574 { 2575 el = handleDefinitionList(); 2576 } 2577 break; 2578 2579 case ':': 2580 if( m_isdefinition ) 2581 { 2582 popElement("dt"); 2583 el = pushElement( new Element("dd") ); 2584 m_isdefinition = false; 2585 } 2586 break; 2587 2588 case '[': 2589 el = handleOpenbracket(); 2590 break; 2591 2592 case '*': 2593 if( m_newLine ) 2594 { 2595 pushBack('*'); 2596 el = handleGeneralList(); 2597 } 2598 break; 2599 2600 case '#': 2601 if( m_newLine ) 2602 { 2603 pushBack('#'); 2604 el = handleGeneralList(); 2605 } 2606 break; 2607 2608 case '|': 2609 el = handleBar( m_newLine ); 2610 break; 2611 2612 case '~': 2613 el = handleTilde(); 2614 break; 2615 2616 case '%': 2617 el = handleDiv( m_newLine ); 2618 break; 2619 2620 case '/': 2621 el = handleSlash( m_newLine ); 2622 break; 2623 2624 default: 2625 break; 2626 } 2627 2628 return el != null ? ELEMENT : CHARACTER; 2629 } 2630 2631 private void closeHeadings() 2632 { 2633 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2634 { 2635 // Add the hash anchor element at the end of the heading 2636 addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2637 m_lastHeading = null; 2638 } 2639 popElement("h2"); 2640 popElement("h3"); 2641 popElement("h4"); 2642 } 2643 2644 /** 2645 * Parses the entire document from the Reader given in the constructor or 2646 * set by {@link #setInputReader(Reader)}. 2647 * 2648 * @return A WikiDocument, ready to be passed to the renderer. 2649 * @throws IOException If parsing cannot be accomplished. 2650 */ 2651 @Override 2652 public WikiDocument parse() 2653 throws IOException 2654 { 2655 WikiDocument d = new WikiDocument( m_context.getPage() ); 2656 d.setContext( m_context ); 2657 2658 Element rootElement = new Element("domroot"); 2659 2660 d.setRootElement( rootElement ); 2661 2662 fillBuffer( rootElement ); 2663 2664 paragraphify(rootElement); 2665 2666 return d; 2667 } 2668 2669 /** 2670 * Checks out that the first paragraph is correctly installed. 2671 * 2672 * @param rootElement 2673 */ 2674 private void paragraphify(Element rootElement) 2675 { 2676 // 2677 // Add the paragraph tag to the first paragraph 2678 // 2679 List< Content > kids = rootElement.getContent(); 2680 2681 if( rootElement.getChild("p") != null ) 2682 { 2683 ArrayList<Content> ls = new ArrayList<>(); 2684 int idxOfFirstContent = 0; 2685 int count = 0; 2686 2687 for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) 2688 { 2689 Content c = i.next(); 2690 if( c instanceof Element ) 2691 { 2692 String name = ( ( Element )c ).getName(); 2693 if( isBlockLevel( name ) ) break; 2694 } 2695 2696 if( !(c instanceof ProcessingInstruction) ) 2697 { 2698 ls.add( c ); 2699 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2700 } 2701 } 2702 2703 // 2704 // If there were any elements, then add a new <p> (unless it would 2705 // be an empty one) 2706 // 2707 if( ls.size() > 0 ) 2708 { 2709 Element newel = new Element("p"); 2710 2711 for( Iterator< Content > i = ls.iterator(); i.hasNext(); ) 2712 { 2713 Content c = i.next(); 2714 2715 c.detach(); 2716 newel.addContent(c); 2717 } 2718 2719 // 2720 // Make sure there are no empty <p/> tags added. 2721 // 2722 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) 2723 rootElement.addContent(idxOfFirstContent, newel); 2724 } 2725 } 2726 } 2727 2728}