001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.IOException; 022import java.io.Reader; 023import java.io.StringReader; 024import java.text.MessageFormat; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.EmptyStackException; 029import java.util.HashMap; 030import java.util.Iterator; 031import java.util.List; 032import java.util.Map; 033import java.util.Properties; 034import java.util.ResourceBundle; 035import java.util.Stack; 036 037import javax.xml.transform.Result; 038 039import org.apache.commons.lang.StringEscapeUtils; 040import org.apache.commons.lang.StringUtils; 041import org.apache.log4j.Logger; 042import org.apache.oro.text.regex.MalformedPatternException; 043import org.apache.oro.text.regex.MatchResult; 044import org.apache.oro.text.regex.Pattern; 045import org.apache.oro.text.regex.PatternCompiler; 046import org.apache.oro.text.regex.PatternMatcher; 047import org.apache.oro.text.regex.Perl5Compiler; 048import org.apache.oro.text.regex.Perl5Matcher; 049import org.apache.wiki.InternalWikiException; 050import org.apache.wiki.StringTransmutator; 051import org.apache.wiki.WikiContext; 052import org.apache.wiki.WikiPage; 053import org.apache.wiki.api.exceptions.PluginException; 054import org.apache.wiki.api.plugin.WikiPlugin; 055import org.apache.wiki.auth.WikiSecurityException; 056import org.apache.wiki.auth.acl.Acl; 057import org.apache.wiki.i18n.InternationalizationManager; 058import org.apache.wiki.preferences.Preferences; 059import org.apache.wiki.render.CleanTextRenderer; 060import org.apache.wiki.render.RenderingManager; 061import org.apache.wiki.util.TextUtil; 062import org.jdom2.Attribute; 063import org.jdom2.Content; 064import org.jdom2.Element; 065import org.jdom2.IllegalDataException; 066import org.jdom2.ProcessingInstruction; 067import org.jdom2.Verifier; 068 069/** 070 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 071 * heart and soul of JSPWiki : make sure you test properly anything that is added, 072 * or else it breaks down horribly. 073 * 074 * @since 2.4 075 */ 076public class JSPWikiMarkupParser extends MarkupParser { 077 078 protected static final int READ = 0; 079 protected static final int EDIT = 1; 080 protected static final int EMPTY = 2; // Empty message 081 protected static final int LOCAL = 3; 082 protected static final int LOCALREF = 4; 083 protected static final int IMAGE = 5; 084 protected static final int EXTERNAL = 6; 085 protected static final int INTERWIKI = 7; 086 protected static final int IMAGELINK = 8; 087 protected static final int IMAGEWIKILINK = 9; 088 protected static final int ATTACHMENT = 10; 089 090 private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); 091 092 private boolean m_isbold = false; 093 private boolean m_isitalic = false; 094 private boolean m_istable = false; 095 private boolean m_isPre = false; 096 private boolean m_isEscaping = false; 097 private boolean m_isdefinition = false; 098 private boolean m_isPreBlock = false; 099 100 /** Contains style information, in multiple forms. */ 101 private Stack<Boolean> m_styleStack = new Stack<>(); 102 103 // general list handling 104 private int m_genlistlevel = 0; 105 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 106 private boolean m_allowPHPWikiStyleLists = true; 107 108 private boolean m_isOpenParagraph = false; 109 110 /** Parser for extended link functionality. */ 111 private LinkParser m_linkParser = new LinkParser(); 112 113 /** Keeps track of any plain text that gets put in the Text nodes */ 114 private StringBuilder m_plainTextBuf = new StringBuilder(20); 115 116 private Element m_currentElement; 117 118 /** Keep track of duplicate header names. */ 119 private Map<String, Integer> m_titleSectionCounter = new HashMap<>(); 120 121 /** If true, consider CamelCase hyperlinks as well. */ 122 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 123 124 /** If true, all hyperlinks are translated as well, regardless whether they 125 are surrounded by brackets. */ 126 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 127 128 /** If true, all outward attachment info links have a small link image appended. */ 129 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 130 131 /** If true, then considers CamelCase links as well. */ 132 private boolean m_camelCaseLinks = false; 133 134 /** If true, then generate special output for wysiwyg editing in certain cases */ 135 private boolean m_wysiwygEditorMode = false; 136 137 /** If true, consider URIs that have no brackets as well. */ 138 // FIXME: Currently reserved, but not used. 139 private boolean m_plainUris = false; 140 141 /** If true, all outward links use a small link image. */ 142 private boolean m_useOutlinkImage = true; 143 144 private boolean m_useAttachmentImage = true; 145 146 /** If true, allows raw HTML. */ 147 private boolean m_allowHTML = false; 148 149 private boolean m_useRelNofollow = false; 150 151 private PatternCompiler m_compiler = new Perl5Compiler(); 152 153 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 154 155 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 156 private Pattern m_camelCasePattern; 157 158 private int m_rowNum = 1; 159 160 private Heading m_lastHeading = null; 161 162 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 163 164 /** 165 * Creates a markup parser. 166 * 167 * @param context The WikiContext which controls the parsing 168 * @param in Where the data is read from. 169 */ 170 public JSPWikiMarkupParser( WikiContext context, Reader in ) 171 { 172 super( context, in ); 173 initialize(); 174 } 175 176 // FIXME: parsers should be pooled for better performance. 177 private void initialize() 178 { 179 initInlineImagePatterns(); 180 181 m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN ); 182 if( m_camelCasePattern == null ) 183 { 184 try 185 { 186 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX, 187 Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 188 } 189 catch( MalformedPatternException e ) 190 { 191 log.fatal("Internal error: Someone put in a faulty pattern.",e); 192 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 193 } 194 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 195 } 196 // 197 // Set the properties. 198 // 199 Properties props = m_engine.getWikiProperties(); 200 201 String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 202 203 if( cclinks != null ) 204 { 205 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 206 } 207 else 208 { 209 m_camelCaseLinks = TextUtil.getBooleanProperty( props, 210 PROP_CAMELCASELINKS, 211 m_camelCaseLinks ); 212 } 213 214 Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE ); 215 if( wysiwygVariable != null ) 216 { 217 m_wysiwygEditorMode = wysiwygVariable.booleanValue(); 218 } 219 220 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 221 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 222 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 223 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 224 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 225 226 if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null ) 227 { 228 disableAccessRules(); 229 } 230 231 m_context.getPage().setHasMetadata(); 232 } 233 234 /** 235 * Calls a transmutator chain. 236 * 237 * @param list Chain to call 238 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 239 * @return The result of the mutation. 240 */ 241 protected String callMutatorChain( Collection< StringTransmutator > list, String text ) 242 { 243 if( list == null || list.size() == 0 ) 244 { 245 return text; 246 } 247 248 for( Iterator< StringTransmutator > i = list.iterator(); i.hasNext(); ) 249 { 250 StringTransmutator m = i.next(); 251 252 text = m.mutate( m_context, text ); 253 } 254 255 return text; 256 } 257 258 /** 259 * Calls the heading listeners. 260 * 261 * @param param A Heading object. 262 */ 263 protected void callHeadingListenerChain( Heading param ) 264 { 265 List< HeadingListener > list = m_headingListenerChain; 266 267 for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); ) 268 { 269 HeadingListener h = i.next(); 270 271 h.headingAdded( m_context, param ); 272 } 273 } 274 275 /** 276 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 277 * if you really know what you are doing. 278 * 279 * @param type One of the types above 280 * @param link URL to which to link to 281 * @param text Link text 282 * @param section If a particular section identifier is required. 283 * @return An A element. 284 * @since 2.4.78 285 */ 286 protected Element createAnchor(int type, String link, String text, String section) 287 { 288 text = escapeHTMLEntities( text ); 289 section = escapeHTMLEntities( section ); 290 Element el = new Element("a"); 291 el.setAttribute("class",CLASS_TYPES[type]); 292 el.setAttribute("href",link+section); 293 el.addContent(text); 294 return el; 295 } 296 297 private Element makeLink( int type, String link, String text, String section, Iterator< Attribute > attributes ) 298 { 299 Element el = null; 300 301 if( text == null ) text = link; 302 303 text = callMutatorChain( m_linkMutators, text ); 304 305 section = (section != null) ? ("#"+section) : ""; 306 307 // Make sure we make a link name that can be accepted 308 // as a valid URL. 309 310 if( link.length() == 0 ) 311 { 312 type = EMPTY; 313 } 314 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 315 316 switch(type) 317 { 318 case READ: 319 el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section ); 320 break; 321 322 case EDIT: 323 el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" ); 324 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 325 326 break; 327 328 case EMPTY: 329 el = new Element("u").addContent(text); 330 break; 331 332 // 333 // These two are for local references - footnotes and 334 // references to footnotes. 335 // We embed the page name (or whatever WikiContext gives us) 336 // to make sure the links are unique across Wiki. 337 // 338 case LOCALREF: 339 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 340 break; 341 342 case LOCAL: 343 el = new Element("a").setAttribute("class",CLASS_FOOTNOTE); 344 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 345 el.addContent("["+text+"]"); 346 break; 347 348 // 349 // With the image, external and interwiki types we need to 350 // make sure nobody can put in Javascript or something else 351 // annoying into the links themselves. We do this by preventing 352 // a haxor from stopping the link name short with quotes in 353 // fillBuffer(). 354 // 355 case IMAGE: 356 el = new Element("img").setAttribute("class","inline"); 357 el.setAttribute("src",link); 358 el.setAttribute("alt",text); 359 break; 360 361 case IMAGELINK: 362 el = new Element("img").setAttribute("class","inline"); 363 el.setAttribute("src",link); 364 el.setAttribute("alt",text); 365 el = createAnchor(IMAGELINK,text,"","").addContent(el); 366 break; 367 368 case IMAGEWIKILINK: 369 String pagelink = m_context.getURL(WikiContext.VIEW,text); 370 el = new Element("img").setAttribute("class","inline"); 371 el.setAttribute("src",link); 372 el.setAttribute("alt",text); 373 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 374 break; 375 376 case EXTERNAL: 377 el = createAnchor( EXTERNAL, link, text, section ); 378 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 379 break; 380 381 case INTERWIKI: 382 el = createAnchor( INTERWIKI, link, text, section ); 383 break; 384 385 case ATTACHMENT: 386 String attlink = m_context.getURL( WikiContext.ATTACH, 387 link ); 388 389 String infolink = m_context.getURL( WikiContext.INFO, 390 link ); 391 392 String imglink = m_context.getURL( WikiContext.NONE, 393 "images/attachment_small.png" ); 394 395 el = createAnchor( ATTACHMENT, attlink, text, "" ); 396 397 pushElement(el); 398 popElement(el.getName()); 399 400 if( m_useAttachmentImage ) 401 { 402 el = new Element("img").setAttribute("src",imglink); 403 el.setAttribute("border","0"); 404 el.setAttribute("alt","(info)"); 405 406 el = new Element("a").setAttribute("href",infolink).addContent(el); 407 el.setAttribute("class","infolink"); 408 } 409 else 410 { 411 el = null; 412 } 413 break; 414 415 default: 416 break; 417 } 418 419 if( el != null && attributes != null ) 420 { 421 while( attributes.hasNext() ) 422 { 423 Attribute attr = attributes.next(); 424 if( attr != null ) 425 { 426 el.setAttribute(attr); 427 } 428 } 429 } 430 431 if( el != null ) 432 { 433 flushPlainText(); 434 m_currentElement.addContent( el ); 435 } 436 return el; 437 } 438 439 /** 440 * Figures out if a link is an off-site link. This recognizes 441 * the most common protocols by checking how it starts. 442 * 443 * @param link The link to check. 444 * @return true, if this is a link outside of this wiki. 445 * @since 2.4 446 * @deprecated - use {@link LinkParsingOperations#isExternalLink(String)} instead. 447 */ 448 @Deprecated 449 public static boolean isExternalLink( String link ) 450 { 451 return new LinkParsingOperations(null).isExternalLink( link ); 452 } 453 454 /** 455 * Returns true if the link is really command to insert 456 * a plugin. 457 * <P> 458 * Currently we just check if the link starts with "{INSERT", 459 * or just plain "{" but not "{$". 460 * 461 * @param link Link text, i.e. the contents of text between []. 462 * @return True, if this link seems to be a command to insert a plugin here. 463 * @deprecated Use {@link LinkParsingOperations#isPluginLink(String)} instead, 464 */ 465 @Deprecated 466 public static boolean isPluginLink( String link ) 467 { 468 return new LinkParsingOperations( null ).isPluginLink( link ); 469 } 470 471 /** 472 * These are all of the HTML 4.01 block-level elements. 473 */ 474 private static final String[] BLOCK_ELEMENTS = { 475 "address", "blockquote", "div", "dl", "fieldset", "form", 476 "h1", "h2", "h3", "h4", "h5", "h6", 477 "hr", "noscript", "ol", "p", "pre", "table", "ul" 478 }; 479 480 private static boolean isBlockLevel( String name ) 481 { 482 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 483 } 484 485 /** 486 * This method peeks ahead in the stream until EOL and returns the result. 487 * It will keep the buffers untouched. 488 * 489 * @return The string from the current position to the end of line. 490 */ 491 492 // FIXME: Always returns an empty line, even if the stream is full. 493 private String peekAheadLine() 494 throws IOException 495 { 496 String s = readUntilEOL().toString(); 497 498 if( s.length() > PUSHBACK_BUFFER_SIZE ) 499 { 500 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 501 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 502 } 503 else 504 { 505 try 506 { 507 pushBack( s ); 508 } 509 catch( IOException e ) 510 { 511 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 512 } 513 } 514 return s; 515 } 516 517 private int flushPlainText() 518 { 519 int numChars = m_plainTextBuf.length(); 520 521 if( numChars > 0 ) 522 { 523 String buf; 524 525 if( !m_allowHTML ) 526 { 527 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 528 } 529 else 530 { 531 buf = m_plainTextBuf.toString(); 532 } 533 // 534 // We must first empty the buffer because the side effect of 535 // calling makeCamelCaseLink() is to call this routine. 536 // 537 538 m_plainTextBuf = new StringBuilder(20); 539 540 try 541 { 542 // 543 // This is the heaviest part of parsing, and therefore we can 544 // do some optimization here. 545 // 546 // 1) Only when the length of the buffer is big enough, we try to do the match 547 // 548 549 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 550 { 551 // System.out.println("Buffer="+buf); 552 553 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 554 { 555 MatchResult result = m_camelCaseMatcher.getMatch(); 556 557 String firstPart = buf.substring(0,result.beginOffset(0)); 558 String prefix = result.group(1); 559 560 if( prefix == null ) prefix = ""; 561 562 String camelCase = result.group(2); 563 String protocol = result.group(3); 564 String uri = protocol+result.group(4); 565 buf = buf.substring(result.endOffset(0)); 566 567 m_currentElement.addContent( firstPart ); 568 569 // 570 // Check if the user does not wish to do URL or WikiWord expansion 571 // 572 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 573 { 574 if( prefix.endsWith("~") ) 575 { 576 if( m_wysiwygEditorMode ) 577 { 578 m_currentElement.addContent( "~" ); 579 } 580 prefix = prefix.substring(0,prefix.length()-1); 581 } 582 if( camelCase != null ) 583 { 584 m_currentElement.addContent( prefix+camelCase ); 585 } 586 else if( protocol != null ) 587 { 588 m_currentElement.addContent( prefix+uri ); 589 } 590 continue; 591 } 592 593 // 594 // Fine, then let's check what kind of a link this was 595 // and emit the proper elements 596 // 597 if( protocol != null ) 598 { 599 char c = uri.charAt(uri.length()-1); 600 if( c == '.' || c == ',' ) 601 { 602 uri = uri.substring(0,uri.length()-1); 603 buf = c + buf; 604 } 605 // System.out.println("URI match "+uri); 606 m_currentElement.addContent( prefix ); 607 makeDirectURILink( uri ); 608 } 609 else 610 { 611 // System.out.println("Matched: '"+camelCase+"'"); 612 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 613 // System.out.println("prefix="+prefix); 614 m_currentElement.addContent( prefix ); 615 616 makeCamelCaseLink( camelCase ); 617 } 618 } 619 620 m_currentElement.addContent( buf ); 621 } 622 else 623 { 624 // 625 // No camelcase asked for, just add the elements 626 // 627 m_currentElement.addContent( buf ); 628 } 629 } 630 catch( IllegalDataException e ) 631 { 632 // 633 // Sometimes it's possible that illegal XML chars is added to the data. 634 // Here we make sure it does not stop parsing. 635 // 636 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 637 } 638 } 639 640 return numChars; 641 } 642 643 /** 644 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 645 * entities that are already escaped). 646 * 647 * @param buf 648 * @return An escaped string. 649 */ 650 private String escapeHTMLEntities(String buf) 651 { 652 StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 653 654 for( int i = 0; i < buf.length(); i++ ) 655 { 656 char ch = buf.charAt(i); 657 658 if( ch == '<' ) 659 { 660 tmpBuf.append("<"); 661 } 662 else if( ch == '>' ) 663 { 664 tmpBuf.append(">"); 665 } 666 else if( ch == '\"' ) 667 { 668 tmpBuf.append("""); 669 } 670 else if( ch == '&' ) 671 { 672 // 673 // If the following is an XML entity reference (&#.*;) we'll 674 // leave it as it is; otherwise we'll replace it with an & 675 // 676 677 boolean isEntity = false; 678 StringBuilder entityBuf = new StringBuilder(); 679 680 if( i < buf.length() -1 ) 681 { 682 for( int j = i; j < buf.length(); j++ ) 683 { 684 char ch2 = buf.charAt(j); 685 686 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 687 { 688 entityBuf.append(ch2); 689 690 if( ch2 == ';' ) 691 { 692 isEntity = true; 693 break; 694 } 695 } 696 else 697 { 698 break; 699 } 700 } 701 } 702 703 if( isEntity ) 704 { 705 tmpBuf.append( entityBuf ); 706 i = i + entityBuf.length() - 1; 707 } 708 else 709 { 710 tmpBuf.append("&"); 711 } 712 713 } 714 else 715 { 716 tmpBuf.append( ch ); 717 } 718 } 719 720 return tmpBuf.toString(); 721 } 722 723 private Element pushElement( Element e ) 724 { 725 flushPlainText(); 726 m_currentElement.addContent( e ); 727 m_currentElement = e; 728 729 return e; 730 } 731 732 private Element addElement( Content e ) 733 { 734 if( e != null ) 735 { 736 flushPlainText(); 737 m_currentElement.addContent( e ); 738 } 739 return m_currentElement; 740 } 741 742 /** 743 * All elements that can be empty by the HTML DTD. 744 */ 745 // Keep sorted. 746 private static final String[] EMPTY_ELEMENTS = { 747 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 748 }; 749 750 /** 751 * Goes through the current element stack and pops all elements until this 752 * element is found - this essentially "closes" and element. 753 * 754 * @param s 755 * @return The new current element, or null, if there was no such element in the entire stack. 756 */ 757 private Element popElement( String s ) 758 { 759 int flushedBytes = flushPlainText(); 760 761 Element currEl = m_currentElement; 762 763 while( currEl.getParentElement() != null ) 764 { 765 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 766 { 767 m_currentElement = currEl.getParentElement(); 768 769 // 770 // Check if it's okay for this element to be empty. Then we will 771 // trick the JDOM generator into not generating an empty element, 772 // by putting an empty string between the tags. Yes, it's a kludge 773 // but what'cha gonna do about it. :-) 774 // 775 776 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 777 { 778 currEl.addContent(""); 779 } 780 781 return m_currentElement; 782 } 783 784 currEl = currEl.getParentElement(); 785 } 786 787 return null; 788 } 789 790 791 /** 792 * Reads the stream until it meets one of the specified 793 * ending characters, or stream end. The ending character will be left 794 * in the stream. 795 */ 796 private String readUntil( String endChars ) 797 throws IOException 798 { 799 StringBuilder sb = new StringBuilder( 80 ); 800 int ch = nextToken(); 801 802 while( ch != -1 ) 803 { 804 if( ch == '\\' ) 805 { 806 ch = nextToken(); 807 if( ch == -1 ) 808 { 809 break; 810 } 811 } 812 else 813 { 814 if( endChars.indexOf((char)ch) != -1 ) 815 { 816 pushBack( ch ); 817 break; 818 } 819 } 820 sb.append( (char) ch ); 821 ch = nextToken(); 822 } 823 824 return sb.toString(); 825 } 826 827 /** 828 * Reads the stream while the characters that have been specified are 829 * in the stream, returning then the result as a String. 830 */ 831 private String readWhile( String endChars ) 832 throws IOException 833 { 834 StringBuilder sb = new StringBuilder( 80 ); 835 int ch = nextToken(); 836 837 while( ch != -1 ) 838 { 839 if( endChars.indexOf((char)ch) == -1 ) 840 { 841 pushBack( ch ); 842 break; 843 } 844 845 sb.append( (char) ch ); 846 ch = nextToken(); 847 } 848 849 return sb.toString(); 850 } 851 852 private JSPWikiMarkupParser m_cleanTranslator; 853 854 /** 855 * Does a lazy init. Otherwise, we would get into a situation 856 * where HTMLRenderer would try and boot a TranslatorReader before 857 * the TranslatorReader it is contained by is up. 858 */ 859 private JSPWikiMarkupParser getCleanTranslator() 860 { 861 if( m_cleanTranslator == null ) 862 { 863 WikiContext dummyContext = new WikiContext( m_engine, 864 m_context.getHttpRequest(), 865 m_context.getPage() ); 866 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 867 868 m_cleanTranslator.m_allowHTML = true; 869 } 870 871 return m_cleanTranslator; 872 } 873 /** 874 * Modifies the "hd" parameter to contain proper values. Because 875 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 876 * % after url encoding with '_'. 877 * <p> 878 * Counts also duplicate headings (= headings with similar name), and 879 * attaches a counter. 880 */ 881 private String makeHeadingAnchor( String baseName, String title, Heading hd ) 882 { 883 hd.m_titleText = title; 884 title = MarkupParser.wikifyLink( title ); 885 886 hd.m_titleSection = m_engine.encodeName(title); 887 888 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) 889 { 890 Integer count = m_titleSectionCounter.get( hd.m_titleSection ); 891 count = count + 1; 892 m_titleSectionCounter.put( hd.m_titleSection, count ); 893 hd.m_titleSection += "-" + count; 894 } 895 else 896 { 897 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 898 } 899 900 hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+ 901 "-"+hd.m_titleSection; 902 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 903 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 904 905 return hd.m_titleAnchor; 906 } 907 908 private String makeSectionTitle( String title ) 909 { 910 title = title.trim(); 911 String outTitle; 912 913 try 914 { 915 JSPWikiMarkupParser dtr = getCleanTranslator(); 916 dtr.setInputReader( new StringReader(title) ); 917 918 CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse()); 919 920 outTitle = ctt.getString(); 921 } 922 catch( IOException e ) 923 { 924 log.fatal("CleanTranslator not working", e); 925 throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e); 926 } 927 928 return outTitle; 929 } 930 931 /** 932 * Returns XHTML for the heading. 933 * 934 * @param level The level of the heading. @see Heading 935 * @param title the title for the heading 936 * @param hd a List to which heading should be added 937 * @return An Element containing the heading 938 */ 939 public Element makeHeading( int level, String title, Heading hd ) 940 { 941 Element el = null; 942 943 String pageName = m_context.getPage().getName(); 944 945 String outTitle = makeSectionTitle( title ); 946 947 hd.m_level = level; 948 949 switch( level ) 950 { 951 case Heading.HEADING_SMALL: 952 el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 953 break; 954 955 case Heading.HEADING_MEDIUM: 956 el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 957 break; 958 959 case Heading.HEADING_LARGE: 960 el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 961 break; 962 963 default: 964 throw new InternalWikiException("Illegal heading type "+level); 965 } 966 967 968 return el; 969 } 970 971 /** 972 * When given a link to a WikiName, we just return 973 * a proper HTML link for it. The local link mutator 974 * chain is also called. 975 */ 976 private Element makeCamelCaseLink( String wikiname ) 977 { 978 String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 979 980 callMutatorChain( m_localLinkMutatorChain, wikiname ); 981 982 if( matchedLink != null ) { 983 makeLink( READ, matchedLink, wikiname, null, null ); 984 } else { 985 makeLink( EDIT, wikiname, wikiname, null, null ); 986 } 987 988 return m_currentElement; 989 } 990 991 /** Holds the image URL for the duration of this parser */ 992 private String m_outlinkImageURL = null; 993 994 /** 995 * Returns an element for the external link image (out.png). However, 996 * this method caches the URL for the lifetime of this MarkupParser, 997 * because it's commonly used, and we'll end up with possibly hundreds 998 * our thousands of references to it... It's a lot faster, too. 999 * 1000 * @return An element containing the HTML for the outlink image. 1001 */ 1002 private Element outlinkImage() 1003 { 1004 Element el = null; 1005 1006 if( m_useOutlinkImage ) 1007 { 1008 if( m_outlinkImageURL == null ) 1009 { 1010 m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE ); 1011 } 1012 1013 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 1014 el.setAttribute( "src", m_outlinkImageURL ); 1015 el.setAttribute( "alt","" ); 1016 } 1017 1018 return el; 1019 } 1020 1021 /** 1022 * Takes an URL and turns it into a regular wiki link. Unfortunately, 1023 * because of the way that flushPlainText() works, it already encodes 1024 * all of the XML entities. But so does WikiContext.getURL(), so we 1025 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 1026 * <p> 1027 * What a crappy problem. 1028 * 1029 * @param url 1030 * @return An anchor Element containing the link. 1031 */ 1032 private Element makeDirectURILink( String url ) 1033 { 1034 Element result; 1035 String last = null; 1036 1037 if( url.endsWith(",") || url.endsWith(".") ) 1038 { 1039 last = url.substring( url.length()-1 ); 1040 url = url.substring( 0, url.length()-1 ); 1041 } 1042 1043 callMutatorChain( m_externalLinkMutatorChain, url ); 1044 1045 if( m_linkParsingOperations.isImageLink( url ) ) 1046 { 1047 result = handleImageLink( StringUtils.replace(url,"&","&"), url, false ); 1048 } 1049 else 1050 { 1051 result = makeLink( EXTERNAL, StringUtils.replace(url,"&","&"), url, null, null ); 1052 addElement( outlinkImage() ); 1053 } 1054 1055 if( last != null ) 1056 { 1057 m_plainTextBuf.append(last); 1058 } 1059 1060 return result; 1061 } 1062 1063 /** 1064 * Image links are handled differently: 1065 * 1. If the text is a WikiName of an existing page, 1066 * it gets linked. 1067 * 2. If the text is an external link, then it is inlined. 1068 * 3. Otherwise it becomes an ALT text. 1069 * 1070 * @param reallink The link to the image. 1071 * @param link Link text portion, may be a link to somewhere else. 1072 * @param hasLinkText If true, then the defined link had a link text available. 1073 * This means that the link text may be a link to a wiki page, 1074 * or an external resource. 1075 */ 1076 1077 // FIXME: isExternalLink() is called twice. 1078 private Element handleImageLink( String reallink, String link, boolean hasLinkText ) 1079 { 1080 String possiblePage = MarkupParser.cleanLink( link ); 1081 1082 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) 1083 { 1084 return makeLink( IMAGELINK, reallink, link, null, null ); 1085 } 1086 else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) 1087 { 1088 // System.out.println("Orig="+link+", Matched: "+matchedLink); 1089 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 1090 1091 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 1092 } 1093 else 1094 { 1095 return makeLink( IMAGE, reallink, link, null, null ); 1096 } 1097 } 1098 1099 private Element handleAccessRule( String ruleLine ) 1100 { 1101 if( m_wysiwygEditorMode ) 1102 { 1103 m_currentElement.addContent( "[" + ruleLine + "]" ); 1104 } 1105 1106 if( !m_parseAccessRules ) return m_currentElement; 1107 Acl acl; 1108 WikiPage page = m_context.getRealPage(); 1109 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1110 1111 if( ruleLine.startsWith( "{" ) ) 1112 ruleLine = ruleLine.substring( 1 ); 1113 if( ruleLine.endsWith( "}" ) ) 1114 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1115 1116 if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine); 1117 1118 try 1119 { 1120 acl = m_engine.getAclManager().parseAcl( page, ruleLine ); 1121 1122 page.setAcl( acl ); 1123 1124 if( log.isDebugEnabled() ) log.debug( acl.toString() ); 1125 } 1126 catch( WikiSecurityException wse ) 1127 { 1128 return makeError( wse.getMessage() ); 1129 } 1130 1131 return m_currentElement; 1132 } 1133 1134 /** 1135 * Handles metadata setting [{SET foo=bar}] 1136 */ 1137 private Element handleMetadata( String link ) 1138 { 1139 if( m_wysiwygEditorMode ) 1140 { 1141 m_currentElement.addContent( "[" + link + "]" ); 1142 } 1143 1144 try 1145 { 1146 String args = link.substring( link.indexOf(' '), link.length()-1 ); 1147 1148 String name = args.substring( 0, args.indexOf('=') ); 1149 String val = args.substring( args.indexOf('=')+1, args.length() ); 1150 1151 name = name.trim(); 1152 val = val.trim(); 1153 1154 if( val.startsWith("'") ) val = val.substring( 1 ); 1155 if( val.endsWith("'") ) val = val.substring( 0, val.length()-1 ); 1156 1157 // log.debug("SET name='"+name+"', value='"+val+"'."); 1158 1159 if( name.length() > 0 && val.length() > 0 ) 1160 { 1161 val = m_engine.getVariableManager().expandVariables( m_context, 1162 val ); 1163 1164 m_context.getPage().setAttribute( name, val ); 1165 } 1166 } 1167 catch( Exception e ) 1168 { 1169 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1170 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1171 } 1172 1173 return m_currentElement; 1174 } 1175 1176 /** 1177 * Emits a processing instruction that will disable markup escaping. This is 1178 * very useful if you want to emit HTML directly into the stream. 1179 * 1180 */ 1181 private void disableOutputEscaping() 1182 { 1183 addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") ); 1184 } 1185 1186 /** 1187 * Gobbles up all hyperlinks that are encased in square brackets. 1188 */ 1189 private Element handleHyperlinks( String linktext, int pos ) 1190 { 1191 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1192 1193 StringBuilder sb = new StringBuilder(linktext.length()+80); 1194 1195 if( m_linkParsingOperations.isAccessRule( linktext ) ) 1196 { 1197 return handleAccessRule( linktext ); 1198 } 1199 1200 if( m_linkParsingOperations.isMetadata( linktext ) ) 1201 { 1202 return handleMetadata( linktext ); 1203 } 1204 1205 if( m_linkParsingOperations.isPluginLink( linktext ) ) 1206 { 1207 try 1208 { 1209 PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1210 // 1211 // This might sometimes fail, especially if there is something which looks 1212 // like a plugin invocation but is really not. 1213 // 1214 if( pluginContent != null ) 1215 { 1216 addElement( pluginContent ); 1217 1218 pluginContent.executeParse( m_context ); 1219 } 1220 } 1221 catch( PluginException e ) 1222 { 1223 log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 1224 //log.info( "Root cause:",e.getRootThrowable() ); 1225 if( !m_wysiwygEditorMode ) 1226 { 1227 ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1228 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 1229 m_context.getRealPage().getWiki(), 1230 m_context.getRealPage().getName(), 1231 e.getMessage() ) ) ); 1232 } 1233 } 1234 1235 return m_currentElement; 1236 } 1237 1238 try 1239 { 1240 LinkParser.Link link = m_linkParser.parse(linktext); 1241 linktext = link.getText(); 1242 String linkref = link.getReference(); 1243 1244 // 1245 // Yes, we now have the components separated. 1246 // linktext = the text the link should have 1247 // linkref = the url or page name. 1248 // 1249 // In many cases these are the same. [linktext|linkref]. 1250 // 1251 if( m_linkParsingOperations.isVariableLink( linktext ) ) 1252 { 1253 Content el = new VariableContent(linktext); 1254 1255 addElement( el ); 1256 } 1257 else if( m_linkParsingOperations.isExternalLink( linkref ) ) 1258 { 1259 // It's an external link, out of this Wiki 1260 1261 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1262 1263 if( m_linkParsingOperations.isImageLink( linkref ) ) 1264 { 1265 handleImageLink( linkref, linktext, link.hasReference() ); 1266 } 1267 else 1268 { 1269 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1270 addElement( outlinkImage() ); 1271 } 1272 } 1273 else if( link.isInterwikiLink() ) 1274 { 1275 // It's an interwiki link 1276 // InterWiki links also get added to external link chain 1277 // after the links have been resolved. 1278 1279 // FIXME: There is an interesting issue here: We probably should 1280 // URLEncode the wikiPage, but we can't since some of the 1281 // Wikis use slashes (/), which won't survive URLEncoding. 1282 // Besides, we don't know which character set the other Wiki 1283 // is using, so you'll have to write the entire name as it appears 1284 // in the URL. Bugger. 1285 1286 String extWiki = link.getExternalWiki(); 1287 String wikiPage = link.getExternalWikiPage(); 1288 1289 if( m_wysiwygEditorMode ) 1290 { 1291 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1292 } 1293 else 1294 { 1295 String urlReference = m_engine.getInterWikiURL( extWiki ); 1296 1297 if( urlReference != null ) 1298 { 1299 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1300 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1301 1302 if( m_linkParsingOperations.isImageLink(urlReference) ) 1303 { 1304 handleImageLink( urlReference, linktext, link.hasReference() ); 1305 } 1306 else 1307 { 1308 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1309 } 1310 1311 if( m_linkParsingOperations.isExternalLink(urlReference) ) 1312 { 1313 addElement( outlinkImage() ); 1314 } 1315 } 1316 else 1317 { 1318 Object[] args = { extWiki }; 1319 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1320 } 1321 } 1322 } 1323 else if( linkref.startsWith("#") ) 1324 { 1325 // It defines a local footnote 1326 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1327 } 1328 else if( TextUtil.isNumber( linkref ) ) 1329 { 1330 // It defines a reference to a local footnote 1331 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1332 } 1333 else 1334 { 1335 int hashMark = -1; 1336 1337 // 1338 // Internal wiki link, but is it an attachment link? 1339 // 1340 String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref ); 1341 if( attachment != null ) 1342 { 1343 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1344 1345 if( m_linkParsingOperations.isImageLink( linkref ) ) 1346 { 1347 attachment = m_context.getURL( WikiContext.ATTACH, attachment ); 1348 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1349 } 1350 else 1351 { 1352 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1353 } 1354 } 1355 else if( (hashMark = linkref.indexOf('#')) != -1 ) 1356 { 1357 // It's an internal Wiki link, but to a named section 1358 1359 String namedSection = linkref.substring( hashMark+1 ); 1360 linkref = linkref.substring( 0, hashMark ); 1361 1362 linkref = MarkupParser.cleanLink( linkref ); 1363 1364 callMutatorChain( m_localLinkMutatorChain, linkref ); 1365 1366 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1367 if( matchedLink != null ) { 1368 String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection)); 1369 sectref = sectref.replace('%', '_'); 1370 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1371 } else { 1372 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1373 } 1374 } 1375 else 1376 { 1377 // It's an internal Wiki link 1378 linkref = MarkupParser.cleanLink( linkref ); 1379 1380 callMutatorChain( m_localLinkMutatorChain, linkref ); 1381 1382 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1383 if( matchedLink != null ) { 1384 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1385 } else { 1386 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1387 } 1388 } 1389 } 1390 } 1391 catch( ParseException e ) 1392 { 1393 log.info("Parser failure: ",e); 1394 Object[] args = { e.getMessage() }; 1395 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1396 } 1397 1398 return m_currentElement; 1399 } 1400 1401 /** 1402 * Pushes back any string that has been read. It will obviously 1403 * be pushed back in a reverse order. 1404 * 1405 * @since 2.1.77 1406 */ 1407 private void pushBack( String s ) 1408 throws IOException 1409 { 1410 for( int i = s.length()-1; i >= 0; i-- ) 1411 { 1412 pushBack( s.charAt(i) ); 1413 } 1414 } 1415 1416 private Element handleBackslash() 1417 throws IOException 1418 { 1419 int ch = nextToken(); 1420 1421 if( ch == '\\' ) 1422 { 1423 int ch2 = nextToken(); 1424 1425 if( ch2 == '\\' ) 1426 { 1427 pushElement( new Element("br").setAttribute("clear","all")); 1428 return popElement("br"); 1429 } 1430 1431 pushBack( ch2 ); 1432 1433 pushElement( new Element("br") ); 1434 return popElement("br"); 1435 } 1436 1437 pushBack( ch ); 1438 1439 return null; 1440 } 1441 1442 private Element handleUnderscore() 1443 throws IOException 1444 { 1445 int ch = nextToken(); 1446 Element el = null; 1447 1448 if( ch == '_' ) 1449 { 1450 if( m_isbold ) 1451 { 1452 el = popElement("b"); 1453 } 1454 else 1455 { 1456 el = pushElement( new Element("b") ); 1457 } 1458 m_isbold = !m_isbold; 1459 } 1460 else 1461 { 1462 pushBack( ch ); 1463 } 1464 1465 return el; 1466 } 1467 1468 1469 /** 1470 * For example: italics. 1471 */ 1472 private Element handleApostrophe() 1473 throws IOException 1474 { 1475 int ch = nextToken(); 1476 Element el = null; 1477 1478 if( ch == '\'' ) 1479 { 1480 if( m_isitalic ) 1481 { 1482 el = popElement("i"); 1483 } 1484 else 1485 { 1486 el = pushElement( new Element("i") ); 1487 } 1488 m_isitalic = !m_isitalic; 1489 } 1490 else 1491 { 1492 pushBack( ch ); 1493 } 1494 1495 return el; 1496 } 1497 1498 private Element handleOpenbrace( boolean isBlock ) 1499 throws IOException 1500 { 1501 int ch = nextToken(); 1502 1503 if( ch == '{' ) 1504 { 1505 int ch2 = nextToken(); 1506 1507 if( ch2 == '{' ) 1508 { 1509 m_isPre = true; 1510 m_isEscaping = true; 1511 m_isPreBlock = isBlock; 1512 1513 if( isBlock ) 1514 { 1515 startBlockLevel(); 1516 return pushElement( new Element("pre") ); 1517 } 1518 1519 return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") ); 1520 } 1521 1522 pushBack( ch2 ); 1523 1524 return pushElement( new Element("tt") ); 1525 } 1526 1527 pushBack( ch ); 1528 1529 return null; 1530 } 1531 1532 /** 1533 * Handles both }} and }}} 1534 */ 1535 private Element handleClosebrace() 1536 throws IOException 1537 { 1538 int ch2 = nextToken(); 1539 1540 if( ch2 == '}' ) 1541 { 1542 int ch3 = nextToken(); 1543 1544 if( ch3 == '}' ) 1545 { 1546 if( m_isPre ) 1547 { 1548 if( m_isPreBlock ) 1549 { 1550 popElement( "pre" ); 1551 } 1552 else 1553 { 1554 popElement( "span" ); 1555 } 1556 1557 m_isPre = false; 1558 m_isEscaping = false; 1559 return m_currentElement; 1560 } 1561 1562 m_plainTextBuf.append("}}}"); 1563 return m_currentElement; 1564 } 1565 1566 pushBack( ch3 ); 1567 1568 if( !m_isEscaping ) 1569 { 1570 return popElement("tt"); 1571 } 1572 } 1573 1574 pushBack( ch2 ); 1575 1576 return null; 1577 } 1578 1579 private Element handleDash() 1580 throws IOException 1581 { 1582 int ch = nextToken(); 1583 1584 if( ch == '-' ) 1585 { 1586 int ch2 = nextToken(); 1587 1588 if( ch2 == '-' ) 1589 { 1590 int ch3 = nextToken(); 1591 1592 if( ch3 == '-' ) 1593 { 1594 // Empty away all the rest of the dashes. 1595 // Do not forget to return the first non-match back. 1596 do 1597 { 1598 ch = nextToken(); 1599 } 1600 while ( ch == '-' ); 1601 1602 pushBack(ch); 1603 startBlockLevel(); 1604 pushElement( new Element("hr") ); 1605 return popElement( "hr" ); 1606 } 1607 1608 pushBack( ch3 ); 1609 } 1610 pushBack( ch2 ); 1611 } 1612 1613 pushBack( ch ); 1614 1615 return null; 1616 } 1617 1618 private Element handleHeading() 1619 throws IOException 1620 { 1621 Element el = null; 1622 1623 int ch = nextToken(); 1624 1625 Heading hd = new Heading(); 1626 1627 if( ch == '!' ) 1628 { 1629 int ch2 = nextToken(); 1630 1631 if( ch2 == '!' ) 1632 { 1633 String title = peekAheadLine(); 1634 1635 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1636 } 1637 else 1638 { 1639 pushBack( ch2 ); 1640 String title = peekAheadLine(); 1641 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1642 } 1643 } 1644 else 1645 { 1646 pushBack( ch ); 1647 String title = peekAheadLine(); 1648 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1649 } 1650 1651 callHeadingListenerChain( hd ); 1652 1653 m_lastHeading = hd; 1654 1655 if( el != null ) pushElement(el); 1656 1657 return el; 1658 } 1659 1660 /** 1661 * Reads the stream until the next EOL or EOF. Note that it will also read the 1662 * EOL from the stream. 1663 */ 1664 private StringBuilder readUntilEOL() 1665 throws IOException 1666 { 1667 int ch; 1668 StringBuilder buf = new StringBuilder( 256 ); 1669 1670 while( true ) 1671 { 1672 ch = nextToken(); 1673 1674 if( ch == -1 ) 1675 break; 1676 1677 buf.append( (char) ch ); 1678 1679 if( ch == '\n' ) 1680 break; 1681 } 1682 return buf; 1683 } 1684 1685 /** Controls whether italic is restarted after a paragraph shift */ 1686 1687 private boolean m_restartitalic = false; 1688 private boolean m_restartbold = false; 1689 1690 private boolean m_newLine; 1691 1692 /** 1693 * Starts a block level element, therefore closing 1694 * a potential open paragraph tag. 1695 */ 1696 private void startBlockLevel() 1697 { 1698 // These may not continue over block level limits in XHTML 1699 1700 popElement("i"); 1701 popElement("b"); 1702 popElement("tt"); 1703 1704 if( m_isOpenParagraph ) 1705 { 1706 m_isOpenParagraph = false; 1707 popElement("p"); 1708 m_plainTextBuf.append("\n"); // Just small beautification 1709 } 1710 1711 m_restartitalic = m_isitalic; 1712 m_restartbold = m_isbold; 1713 1714 m_isitalic = false; 1715 m_isbold = false; 1716 } 1717 1718 private static String getListType( char c ) 1719 { 1720 if( c == '*' ) 1721 { 1722 return "ul"; 1723 } 1724 else if( c == '#' ) 1725 { 1726 return "ol"; 1727 } 1728 throw new InternalWikiException("Parser got faulty list type: "+c); 1729 } 1730 /** 1731 * Like original handleOrderedList() and handleUnorderedList() 1732 * however handles both ordered ('#') and unordered ('*') mixed together. 1733 */ 1734 1735 // FIXME: Refactor this; it's a bit messy. 1736 1737 private Element handleGeneralList() 1738 throws IOException 1739 { 1740 startBlockLevel(); 1741 1742 String strBullets = readWhile( "*#" ); 1743 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1744 int numBullets = strBullets.length(); 1745 1746 // override the beginning portion of bullet pattern to be like the previous 1747 // to simulate PHPWiki style lists 1748 1749 if(m_allowPHPWikiStyleLists) 1750 { 1751 // only substitute if different 1752 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1753 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 1754 { 1755 if(numBullets <= m_genlistlevel) 1756 { 1757 // Substitute all but the last character (keep the expressed bullet preference) 1758 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 1759 + strBullets.substring(numBullets-1, numBullets); 1760 } 1761 else 1762 { 1763 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 1764 } 1765 } 1766 } 1767 1768 // 1769 // Check if this is still of the same type 1770 // 1771 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1772 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 1773 { 1774 if( numBullets > m_genlistlevel ) 1775 { 1776 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 1777 1778 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 1779 { 1780 // bullets are growing, get from new bullet list 1781 pushElement( new Element("li") ); 1782 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 1783 } 1784 } 1785 else if( numBullets < m_genlistlevel ) 1786 { 1787 // Close the previous list item. 1788 // buf.append( m_renderer.closeListItem() ); 1789 popElement( "li" ); 1790 1791 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 1792 { 1793 // bullets are shrinking, get from old bullet list 1794 1795 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1796 if( m_genlistlevel > 0 ) 1797 { 1798 popElement( "li" ); 1799 } 1800 1801 } 1802 } 1803 else 1804 { 1805 if( m_genlistlevel > 0 ) 1806 { 1807 popElement( "li" ); 1808 } 1809 } 1810 } 1811 else 1812 { 1813 // 1814 // The pattern has changed, unwind and restart 1815 // 1816 int numEqualBullets; 1817 int numCheckBullets; 1818 1819 // find out how much is the same 1820 numEqualBullets = 0; 1821 numCheckBullets = Math.min(numBullets,m_genlistlevel); 1822 1823 while( numEqualBullets < numCheckBullets ) 1824 { 1825 // if the bullets are equal so far, keep going 1826 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 1827 numEqualBullets++; 1828 // otherwise giveup, we have found how many are equal 1829 else 1830 break; 1831 } 1832 1833 //unwind 1834 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 1835 { 1836 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 1837 if( m_genlistlevel > numBullets ) 1838 { 1839 popElement("li"); 1840 } 1841 } 1842 1843 //rewind 1844 1845 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 1846 for(int i = numEqualBullets; i < numBullets; i++) 1847 { 1848 pushElement( new Element("li") ); 1849 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 1850 } 1851 m_genlistlevel = numBullets; 1852 } 1853 1854 // 1855 // Push a new list item, and eat away any extra whitespace 1856 // 1857 pushElement( new Element("li") ); 1858 readWhile(" "); 1859 1860 // work done, remember the new bullet list (in place of old one) 1861 m_genlistBulletBuffer.setLength(0); 1862 m_genlistBulletBuffer.append(strBullets); 1863 1864 return m_currentElement; 1865 } 1866 1867 private Element unwindGeneralList() 1868 { 1869 //unwind 1870 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 1871 { 1872 popElement( "li" ); 1873 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1874 } 1875 1876 m_genlistBulletBuffer.setLength(0); 1877 1878 return null; 1879 } 1880 1881 1882 private Element handleDefinitionList() 1883 throws IOException 1884 { 1885 if( !m_isdefinition ) 1886 { 1887 m_isdefinition = true; 1888 1889 startBlockLevel(); 1890 1891 pushElement( new Element("dl") ); 1892 return pushElement( new Element("dt") ); 1893 } 1894 1895 return null; 1896 } 1897 1898 private Element handleOpenbracket() 1899 throws IOException 1900 { 1901 StringBuilder sb = new StringBuilder(40); 1902 int pos = getPosition(); 1903 int ch = nextToken(); 1904 boolean isPlugin = false; 1905 1906 if( ch == '[' ) 1907 { 1908 if( m_wysiwygEditorMode ) 1909 { 1910 sb.append( '[' ); 1911 } 1912 1913 sb.append( (char)ch ); 1914 1915 while( (ch = nextToken()) == '[' ) 1916 { 1917 sb.append( (char)ch ); 1918 } 1919 } 1920 1921 1922 if( ch == '{' ) 1923 { 1924 isPlugin = true; 1925 } 1926 1927 pushBack( ch ); 1928 1929 if( sb.length() > 0 ) 1930 { 1931 m_plainTextBuf.append( sb ); 1932 return m_currentElement; 1933 } 1934 1935 // 1936 // Find end of hyperlink 1937 // 1938 1939 ch = nextToken(); 1940 int nesting = 1; // Check for nested plugins 1941 1942 while( ch != -1 ) 1943 { 1944 int ch2 = nextToken(); pushBack(ch2); 1945 1946 if( isPlugin ) 1947 { 1948 if( ch == '[' && ch2 == '{' ) 1949 { 1950 nesting++; 1951 } 1952 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 1953 { 1954 break; 1955 } 1956 else if( ch == '}' && ch2 == ']' ) 1957 { 1958 // NB: This will be decremented once at the end 1959 nesting--; 1960 } 1961 } 1962 else 1963 { 1964 if( ch == ']' ) 1965 { 1966 break; 1967 } 1968 } 1969 1970 sb.append( (char) ch ); 1971 1972 ch = nextToken(); 1973 } 1974 1975 // 1976 // If the link is never finished, do some tricks to display the rest of the line 1977 // unchanged. 1978 // 1979 if( ch == -1 ) 1980 { 1981 log.debug("Warning: unterminated link detected!"); 1982 m_isEscaping = true; 1983 m_plainTextBuf.append( sb ); 1984 flushPlainText(); 1985 m_isEscaping = false; 1986 return m_currentElement; 1987 } 1988 1989 return handleHyperlinks( sb.toString(), pos ); 1990 } 1991 1992 /** 1993 * Reads the stream until the current brace is closed or stream end. 1994 */ 1995 private String readBraceContent( char opening, char closing ) 1996 throws IOException 1997 { 1998 StringBuilder sb = new StringBuilder(40); 1999 int braceLevel = 1; 2000 int ch; 2001 while(( ch = nextToken() ) != -1 ) 2002 { 2003 if( ch == '\\' ) 2004 { 2005 continue; 2006 } 2007 else if ( ch == opening ) 2008 { 2009 braceLevel++; 2010 } 2011 else if ( ch == closing ) 2012 { 2013 braceLevel--; 2014 if (braceLevel==0) 2015 { 2016 break; 2017 } 2018 } 2019 sb.append( (char)ch ); 2020 } 2021 return sb.toString(); 2022 } 2023 2024 2025 /** 2026 * Handles constructs of type %%(style) and %%class 2027 * @param newLine 2028 * @return An Element containing the div or span, depending on the situation. 2029 * @throws IOException 2030 */ 2031 private Element handleDiv( boolean newLine ) 2032 throws IOException 2033 { 2034 int ch = nextToken(); 2035 Element el = null; 2036 2037 if( ch == '%' ) 2038 { 2039 String style = null; 2040 String clazz = null; 2041 2042 ch = nextToken(); 2043 2044 // 2045 // Style or class? 2046 // 2047 if( ch == '(' ) 2048 { 2049 style = readBraceContent('(',')'); 2050 } 2051 else if( Character.isLetter( (char) ch ) ) 2052 { 2053 pushBack( ch ); 2054 clazz = readUntil( " \t\n\r" ); 2055 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 2056 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 2057 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 2058 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 2059 2060 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 2061 //(2) remove any invalid character 2062 if( clazz != null){ 2063 2064 clazz = clazz.replace('.', ' ') 2065 .replaceAll("[^\\s-_\\w\\x200-\\x377]+",""); 2066 2067 } 2068 ch = nextToken(); 2069 2070 // 2071 // Pop out only spaces, so that the upcoming EOL check does not check the 2072 // next line. 2073 // 2074 if( ch == '\n' || ch == '\r' ) 2075 { 2076 pushBack(ch); 2077 } 2078 } 2079 else 2080 { 2081 // 2082 // Anything else stops. 2083 // 2084 2085 pushBack(ch); 2086 2087 try 2088 { 2089 Boolean isSpan = m_styleStack.pop(); 2090 2091 if( isSpan == null ) 2092 { 2093 // Fail quietly 2094 } 2095 else if( isSpan.booleanValue() ) 2096 { 2097 el = popElement( "span" ); 2098 } 2099 else 2100 { 2101 el = popElement( "div" ); 2102 } 2103 } 2104 catch( EmptyStackException e ) 2105 { 2106 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 2107 return m_currentElement; 2108 } 2109 2110 return el; 2111 } 2112 2113 // 2114 // Check if there is an attempt to do something nasty 2115 // 2116 2117 try 2118 { 2119 style = StringEscapeUtils.unescapeHtml(style); 2120 if( style != null && style.indexOf("javascript:") != -1 ) 2121 { 2122 log.debug("Attempt to output javascript within CSS:"+style); 2123 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2124 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 2125 } 2126 } 2127 catch( NumberFormatException e ) 2128 { 2129 // 2130 // If there are unknown entities, we don't want the parser to stop. 2131 // 2132 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2133 String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 2134 return addElement( makeError( msg ) ); 2135 } 2136 2137 // 2138 // Decide if we should open a div or a span? 2139 // 2140 String eol = peekAheadLine(); 2141 2142 if( eol.trim().length() > 0 ) 2143 { 2144 // There is stuff after the class 2145 2146 el = new Element("span"); 2147 2148 m_styleStack.push( Boolean.TRUE ); 2149 } 2150 else 2151 { 2152 startBlockLevel(); 2153 el = new Element("div"); 2154 m_styleStack.push( Boolean.FALSE ); 2155 } 2156 2157 if( style != null ) el.setAttribute("style", style); 2158 if( clazz != null ) el.setAttribute("class", clazz); 2159 el = pushElement( el ); 2160 2161 return el; 2162 } 2163 2164 pushBack(ch); 2165 2166 return el; 2167 } 2168 2169 private Element handleSlash( boolean newLine ) 2170 throws IOException 2171 { 2172 int ch = nextToken(); 2173 2174 pushBack(ch); 2175 if( ch == '%' && !m_styleStack.isEmpty() ) 2176 { 2177 return handleDiv( newLine ); 2178 } 2179 2180 return null; 2181 } 2182 2183 private Element handleBar( boolean newLine ) 2184 throws IOException 2185 { 2186 Element el = null; 2187 2188 if( !m_istable && !newLine ) 2189 { 2190 return null; 2191 } 2192 2193 // 2194 // If the bar is in the first column, we will either start 2195 // a new table or continue the old one. 2196 // 2197 2198 if( newLine ) 2199 { 2200 if( !m_istable ) 2201 { 2202 startBlockLevel(); 2203 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2204 m_istable = true; 2205 m_rowNum = 0; 2206 } 2207 2208 m_rowNum++; 2209 Element tr = ( m_rowNum % 2 != 0 ) 2210 ? new Element("tr").setAttribute("class", "odd") 2211 : new Element("tr"); 2212 el = pushElement( tr ); 2213 } 2214 2215 // 2216 // Check out which table cell element to start; 2217 // a header element (th) or a regular element (td). 2218 // 2219 int ch = nextToken(); 2220 2221 if( ch == '|' ) 2222 { 2223 if( !newLine ) 2224 { 2225 el = popElement("th"); 2226 if( el == null ) popElement("td"); 2227 } 2228 el = pushElement( new Element("th") ); 2229 } 2230 else 2231 { 2232 if( !newLine ) 2233 { 2234 el = popElement("td"); 2235 if( el == null ) popElement("th"); 2236 } 2237 2238 el = pushElement( new Element("td") ); 2239 2240 pushBack( ch ); 2241 } 2242 2243 return el; 2244 } 2245 2246 /** 2247 * Generic escape of next character or entity. 2248 */ 2249 private Element handleTilde() 2250 throws IOException 2251 { 2252 int ch = nextToken(); 2253 2254 if( ch == ' ' ) 2255 { 2256 if( m_wysiwygEditorMode ) 2257 { 2258 m_plainTextBuf.append( "~ " ); 2259 } 2260 return m_currentElement; 2261 } 2262 2263 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2264 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2265 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2266 { 2267 if( m_wysiwygEditorMode ) 2268 { 2269 m_plainTextBuf.append( '~' ); 2270 } 2271 2272 m_plainTextBuf.append( (char)ch ); 2273 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2274 return m_currentElement; 2275 } 2276 2277 // No escape. 2278 pushBack( ch ); 2279 2280 return null; 2281 } 2282 2283 private void fillBuffer( Element startElement ) 2284 throws IOException 2285 { 2286 m_currentElement = startElement; 2287 2288 boolean quitReading = false; 2289 m_newLine = true; 2290 disableOutputEscaping(); 2291 2292 while(!quitReading) 2293 { 2294 int ch = nextToken(); 2295 2296 if( ch == -1 ) break; 2297 2298 // 2299 // Check if we're actually ending the preformatted mode. 2300 // We still must do an entity transformation here. 2301 // 2302 if( m_isEscaping ) 2303 { 2304 if( ch == '}' ) 2305 { 2306 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2307 } 2308 else if( ch == -1 ) 2309 { 2310 quitReading = true; 2311 } 2312 else if( ch == '\r' ) 2313 { 2314 // DOS line feeds we ignore. 2315 } 2316 else if( ch == '<' ) 2317 { 2318 m_plainTextBuf.append( "<" ); 2319 } 2320 else if( ch == '>' ) 2321 { 2322 m_plainTextBuf.append( ">" ); 2323 } 2324 else if( ch == '&' ) 2325 { 2326 m_plainTextBuf.append( "&" ); 2327 } 2328 else if( ch == '~' ) 2329 { 2330 String braces = readWhile("}"); 2331 if( braces.length() >= 3 ) 2332 { 2333 m_plainTextBuf.append("}}}"); 2334 2335 braces = braces.substring(3); 2336 } 2337 else 2338 { 2339 m_plainTextBuf.append( (char) ch ); 2340 } 2341 2342 for( int i = braces.length()-1; i >= 0; i-- ) 2343 { 2344 pushBack(braces.charAt(i)); 2345 } 2346 } 2347 else 2348 { 2349 m_plainTextBuf.append( (char) ch ); 2350 } 2351 2352 continue; 2353 } 2354 2355 // 2356 // An empty line stops a list 2357 // 2358 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2359 { 2360 m_plainTextBuf.append(unwindGeneralList()); 2361 } 2362 2363 if( m_newLine && ch != '|' && m_istable ) 2364 { 2365 popElement("table"); 2366 m_istable = false; 2367 } 2368 2369 int skip = IGNORE; 2370 2371 // 2372 // Do the actual parsing and catch any errors. 2373 // 2374 try 2375 { 2376 skip = parseToken( ch ); 2377 } 2378 catch( IllegalDataException e ) 2379 { 2380 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2381 2382 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2383 } 2384 2385 // 2386 // The idea is as follows: If the handler method returns 2387 // an element (el != null), it is assumed that it has been 2388 // added in the stack. Otherwise the character is added 2389 // as is to the plaintext buffer. 2390 // 2391 // For the transition phase, if s != null, it also gets 2392 // added in the plaintext buffer. 2393 // 2394 2395 switch( skip ) 2396 { 2397 case ELEMENT: 2398 m_newLine = false; 2399 break; 2400 2401 case CHARACTER: 2402 m_plainTextBuf.append( (char) ch ); 2403 m_newLine = false; 2404 break; 2405 2406 case IGNORE: 2407 default: 2408 break; 2409 } 2410 } 2411 2412 closeHeadings(); 2413 popElement("domroot"); 2414 } 2415 2416 private String cleanupSuspectData( String s ) 2417 { 2418 StringBuilder sb = new StringBuilder( s.length() ); 2419 2420 for( int i = 0; i < s.length(); i++ ) 2421 { 2422 char c = s.charAt(i); 2423 2424 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2425 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); 2426 } 2427 2428 return sb.toString(); 2429 } 2430 2431 /** The token is a plain character. */ 2432 protected static final int CHARACTER = 0; 2433 2434 /** The token is a wikimarkup element. */ 2435 protected static final int ELEMENT = 1; 2436 2437 /** The token is to be ignored. */ 2438 protected static final int IGNORE = 2; 2439 2440 /** 2441 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2442 * you think this was a wiki markup element, and IGNORE, if you think 2443 * we should ignore this altogether. 2444 * <p> 2445 * To add your own MarkupParser, you can override this method, but it 2446 * is recommended that you call super.parseToken() as well to gain advantage 2447 * of JSPWiki's own markup. You can call it at the start of your own 2448 * parseToken() or end - it does not matter. 2449 * 2450 * @param ch The character under investigation 2451 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2452 * @throws IOException If parsing fails. 2453 */ 2454 protected int parseToken( int ch ) 2455 throws IOException 2456 { 2457 Element el = null; 2458 2459 // 2460 // Now, check the incoming token. 2461 // 2462 switch( ch ) 2463 { 2464 case '\r': 2465 // DOS linefeeds we forget 2466 return IGNORE; 2467 2468 case '\n': 2469 // 2470 // Close things like headings, etc. 2471 // 2472 2473 // FIXME: This is not really very fast 2474 2475 closeHeadings(); 2476 2477 popElement("dl"); // Close definition lists. 2478 if( m_istable ) 2479 { 2480 popElement("tr"); 2481 } 2482 2483 m_isdefinition = false; 2484 2485 if( m_newLine ) 2486 { 2487 // Paragraph change. 2488 startBlockLevel(); 2489 2490 // 2491 // Figure out which elements cannot be enclosed inside 2492 // a <p></p> pair according to XHTML rules. 2493 // 2494 String nextLine = peekAheadLine(); 2495 if( nextLine.length() == 0 || 2496 (nextLine.length() > 0 && 2497 !nextLine.startsWith("{{{") && 2498 !nextLine.startsWith("----") && 2499 !nextLine.startsWith("%%") && 2500 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2501 { 2502 pushElement( new Element("p") ); 2503 m_isOpenParagraph = true; 2504 2505 if( m_restartitalic ) 2506 { 2507 pushElement( new Element("i") ); 2508 m_isitalic = true; 2509 m_restartitalic = false; 2510 } 2511 if( m_restartbold ) 2512 { 2513 pushElement( new Element("b") ); 2514 m_isbold = true; 2515 m_restartbold = false; 2516 } 2517 } 2518 } 2519 else 2520 { 2521 m_plainTextBuf.append("\n"); 2522 m_newLine = true; 2523 } 2524 return IGNORE; 2525 2526 2527 case '\\': 2528 el = handleBackslash(); 2529 break; 2530 2531 case '_': 2532 el = handleUnderscore(); 2533 break; 2534 2535 case '\'': 2536 el = handleApostrophe(); 2537 break; 2538 2539 case '{': 2540 el = handleOpenbrace( m_newLine ); 2541 break; 2542 2543 case '}': 2544 el = handleClosebrace(); 2545 break; 2546 2547 case '-': 2548 if( m_newLine ) 2549 el = handleDash(); 2550 2551 break; 2552 2553 case '!': 2554 if( m_newLine ) 2555 { 2556 el = handleHeading(); 2557 } 2558 break; 2559 2560 case ';': 2561 if( m_newLine ) 2562 { 2563 el = handleDefinitionList(); 2564 } 2565 break; 2566 2567 case ':': 2568 if( m_isdefinition ) 2569 { 2570 popElement("dt"); 2571 el = pushElement( new Element("dd") ); 2572 m_isdefinition = false; 2573 } 2574 break; 2575 2576 case '[': 2577 el = handleOpenbracket(); 2578 break; 2579 2580 case '*': 2581 if( m_newLine ) 2582 { 2583 pushBack('*'); 2584 el = handleGeneralList(); 2585 } 2586 break; 2587 2588 case '#': 2589 if( m_newLine ) 2590 { 2591 pushBack('#'); 2592 el = handleGeneralList(); 2593 } 2594 break; 2595 2596 case '|': 2597 el = handleBar( m_newLine ); 2598 break; 2599 2600 case '~': 2601 el = handleTilde(); 2602 break; 2603 2604 case '%': 2605 el = handleDiv( m_newLine ); 2606 break; 2607 2608 case '/': 2609 el = handleSlash( m_newLine ); 2610 break; 2611 2612 default: 2613 break; 2614 } 2615 2616 return el != null ? ELEMENT : CHARACTER; 2617 } 2618 2619 private void closeHeadings() 2620 { 2621 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2622 { 2623 // Add the hash anchor element at the end of the heading 2624 addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2625 m_lastHeading = null; 2626 } 2627 popElement("h2"); 2628 popElement("h3"); 2629 popElement("h4"); 2630 } 2631 2632 /** 2633 * Parses the entire document from the Reader given in the constructor or 2634 * set by {@link #setInputReader(Reader)}. 2635 * 2636 * @return A WikiDocument, ready to be passed to the renderer. 2637 * @throws IOException If parsing cannot be accomplished. 2638 */ 2639 @Override 2640 public WikiDocument parse() 2641 throws IOException 2642 { 2643 WikiDocument d = new WikiDocument( m_context.getPage() ); 2644 d.setContext( m_context ); 2645 2646 Element rootElement = new Element("domroot"); 2647 2648 d.setRootElement( rootElement ); 2649 2650 fillBuffer( rootElement ); 2651 2652 paragraphify(rootElement); 2653 2654 return d; 2655 } 2656 2657 /** 2658 * Checks out that the first paragraph is correctly installed. 2659 * 2660 * @param rootElement 2661 */ 2662 private void paragraphify(Element rootElement) 2663 { 2664 // 2665 // Add the paragraph tag to the first paragraph 2666 // 2667 List< Content > kids = rootElement.getContent(); 2668 2669 if( rootElement.getChild("p") != null ) 2670 { 2671 ArrayList<Content> ls = new ArrayList<>(); 2672 int idxOfFirstContent = 0; 2673 int count = 0; 2674 2675 for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) 2676 { 2677 Content c = i.next(); 2678 if( c instanceof Element ) 2679 { 2680 String name = ( ( Element )c ).getName(); 2681 if( isBlockLevel( name ) ) break; 2682 } 2683 2684 if( !(c instanceof ProcessingInstruction) ) 2685 { 2686 ls.add( c ); 2687 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2688 } 2689 } 2690 2691 // 2692 // If there were any elements, then add a new <p> (unless it would 2693 // be an empty one) 2694 // 2695 if( ls.size() > 0 ) 2696 { 2697 Element newel = new Element("p"); 2698 2699 for( Iterator< Content > i = ls.iterator(); i.hasNext(); ) 2700 { 2701 Content c = i.next(); 2702 2703 c.detach(); 2704 newel.addContent(c); 2705 } 2706 2707 // 2708 // Make sure there are no empty <p/> tags added. 2709 // 2710 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) 2711 rootElement.addContent(idxOfFirstContent, newel); 2712 } 2713 } 2714 } 2715 2716}