001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.IOException; 022import java.io.Reader; 023import java.io.StringReader; 024import java.text.MessageFormat; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.EmptyStackException; 029import java.util.HashMap; 030import java.util.Iterator; 031import java.util.List; 032import java.util.Map; 033import java.util.Properties; 034import java.util.ResourceBundle; 035import java.util.Stack; 036 037import javax.xml.transform.Result; 038 039import org.apache.commons.lang.StringEscapeUtils; 040import org.apache.commons.lang.StringUtils; 041import org.apache.log4j.Logger; 042import org.apache.oro.text.regex.MalformedPatternException; 043import org.apache.oro.text.regex.MatchResult; 044import org.apache.oro.text.regex.Pattern; 045import org.apache.oro.text.regex.PatternCompiler; 046import org.apache.oro.text.regex.PatternMatcher; 047import org.apache.oro.text.regex.Perl5Compiler; 048import org.apache.oro.text.regex.Perl5Matcher; 049import org.apache.wiki.InternalWikiException; 050import org.apache.wiki.StringTransmutator; 051import org.apache.wiki.WikiContext; 052import org.apache.wiki.WikiPage; 053import org.apache.wiki.api.exceptions.PluginException; 054import org.apache.wiki.api.plugin.WikiPlugin; 055import org.apache.wiki.auth.WikiSecurityException; 056import org.apache.wiki.auth.acl.Acl; 057import org.apache.wiki.i18n.InternationalizationManager; 058import org.apache.wiki.preferences.Preferences; 059import org.apache.wiki.render.CleanTextRenderer; 060import org.apache.wiki.render.RenderingManager; 061import org.apache.wiki.util.TextUtil; 062import org.jdom2.Attribute; 063import org.jdom2.Content; 064import org.jdom2.Element; 065import org.jdom2.IllegalDataException; 066import org.jdom2.ProcessingInstruction; 067import org.jdom2.Verifier; 068 069/** 070 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 071 * heart and soul of JSPWiki : make sure you test properly anything that is added, 072 * or else it breaks down horribly. 073 * 074 * @since 2.4 075 */ 076public class JSPWikiMarkupParser extends MarkupParser { 077 078 protected static final int READ = 0; 079 protected static final int EDIT = 1; 080 protected static final int EMPTY = 2; // Empty message 081 protected static final int LOCAL = 3; 082 protected static final int LOCALREF = 4; 083 protected static final int IMAGE = 5; 084 protected static final int EXTERNAL = 6; 085 protected static final int INTERWIKI = 7; 086 protected static final int IMAGELINK = 8; 087 protected static final int IMAGEWIKILINK = 9; 088 protected static final int ATTACHMENT = 10; 089 090 private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); 091 092 private boolean m_isbold = false; 093 private boolean m_isitalic = false; 094 private boolean m_istable = false; 095 private boolean m_isPre = false; 096 private boolean m_isEscaping = false; 097 private boolean m_isdefinition = false; 098 private boolean m_isPreBlock = false; 099 100 /** Contains style information, in multiple forms. */ 101 private Stack<Boolean> m_styleStack = new Stack<Boolean>(); 102 103 // general list handling 104 private int m_genlistlevel = 0; 105 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 106 private boolean m_allowPHPWikiStyleLists = true; 107 108 private boolean m_isOpenParagraph = false; 109 110 /** Parser for extended link functionality. */ 111 private LinkParser m_linkParser = new LinkParser(); 112 113 /** Keeps track of any plain text that gets put in the Text nodes */ 114 private StringBuilder m_plainTextBuf = new StringBuilder(20); 115 116 private Element m_currentElement; 117 118 /** Keep track of duplicate header names. */ 119 private Map<String, Integer> m_titleSectionCounter = new HashMap<String, Integer>(); 120 121 /** If true, consider CamelCase hyperlinks as well. */ 122 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 123 124 /** If true, all hyperlinks are translated as well, regardless whether they 125 are surrounded by brackets. */ 126 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 127 128 /** If true, all outward attachment info links have a small link image appended. */ 129 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 130 131 /** If true, then considers CamelCase links as well. */ 132 private boolean m_camelCaseLinks = false; 133 134 /** If true, then generate special output for wysiwyg editing in certain cases */ 135 private boolean m_wysiwygEditorMode = false; 136 137 /** If true, consider URIs that have no brackets as well. */ 138 // FIXME: Currently reserved, but not used. 139 private boolean m_plainUris = false; 140 141 /** If true, all outward links use a small link image. */ 142 private boolean m_useOutlinkImage = true; 143 144 private boolean m_useAttachmentImage = true; 145 146 /** If true, allows raw HTML. */ 147 private boolean m_allowHTML = false; 148 149 private boolean m_useRelNofollow = false; 150 151 private PatternCompiler m_compiler = new Perl5Compiler(); 152 153 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 154 155 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 156 private Pattern m_camelCasePattern; 157 158 private int m_rowNum = 1; 159 160 private Heading m_lastHeading = null; 161 162 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 163 164 /** 165 * Creates a markup parser. 166 * 167 * @param context The WikiContext which controls the parsing 168 * @param in Where the data is read from. 169 */ 170 public JSPWikiMarkupParser( WikiContext context, Reader in ) 171 { 172 super( context, in ); 173 initialize(); 174 } 175 176 // FIXME: parsers should be pooled for better performance. 177 private void initialize() 178 { 179 initInlineImagePatterns(); 180 181 m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN ); 182 if( m_camelCasePattern == null ) 183 { 184 try 185 { 186 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX, 187 Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 188 } 189 catch( MalformedPatternException e ) 190 { 191 log.fatal("Internal error: Someone put in a faulty pattern.",e); 192 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 193 } 194 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 195 } 196 // 197 // Set the properties. 198 // 199 Properties props = m_engine.getWikiProperties(); 200 201 String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 202 203 if( cclinks != null ) 204 { 205 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 206 } 207 else 208 { 209 m_camelCaseLinks = TextUtil.getBooleanProperty( props, 210 PROP_CAMELCASELINKS, 211 m_camelCaseLinks ); 212 } 213 214 Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE ); 215 if( wysiwygVariable != null ) 216 { 217 m_wysiwygEditorMode = wysiwygVariable.booleanValue(); 218 } 219 220 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 221 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 222 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 223 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 224 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 225 226 if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null ) 227 { 228 disableAccessRules(); 229 } 230 231 m_context.getPage().setHasMetadata(); 232 } 233 234 /** 235 * Calls a transmutator chain. 236 * 237 * @param list Chain to call 238 * @param text Text that should be passed to the mutate() method 239 * of each of the mutators in the chain. 240 * @return The result of the mutation. 241 */ 242 243 protected String callMutatorChain( Collection list, String text ) 244 { 245 if( list == null || list.size() == 0 ) 246 { 247 return text; 248 } 249 250 for( Iterator i = list.iterator(); i.hasNext(); ) 251 { 252 StringTransmutator m = (StringTransmutator) i.next(); 253 254 text = m.mutate( m_context, text ); 255 } 256 257 return text; 258 } 259 260 /** 261 * Calls the heading listeners. 262 * 263 * @param param A Heading object. 264 */ 265 protected void callHeadingListenerChain( Heading param ) 266 { 267 List< HeadingListener > list = m_headingListenerChain; 268 269 for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); ) 270 { 271 HeadingListener h = i.next(); 272 273 h.headingAdded( m_context, param ); 274 } 275 } 276 277 /** 278 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 279 * if you really know what you are doing. 280 * 281 * @param type One of the types above 282 * @param link URL to which to link to 283 * @param text Link text 284 * @param section If a particular section identifier is required. 285 * @return An A element. 286 * @since 2.4.78 287 */ 288 protected Element createAnchor(int type, String link, String text, String section) 289 { 290 text = escapeHTMLEntities( text ); 291 section = escapeHTMLEntities( section ); 292 Element el = new Element("a"); 293 el.setAttribute("class",CLASS_TYPES[type]); 294 el.setAttribute("href",link+section); 295 el.addContent(text); 296 return el; 297 } 298 299 private Element makeLink( int type, String link, String text, String section, Iterator attributes ) 300 { 301 Element el = null; 302 303 if( text == null ) text = link; 304 305 text = callMutatorChain( m_linkMutators, text ); 306 307 section = (section != null) ? ("#"+section) : ""; 308 309 // Make sure we make a link name that can be accepted 310 // as a valid URL. 311 312 if( link.length() == 0 ) 313 { 314 type = EMPTY; 315 } 316 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 317 318 switch(type) 319 { 320 case READ: 321 el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section ); 322 break; 323 324 case EDIT: 325 el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" ); 326 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 327 328 break; 329 330 case EMPTY: 331 el = new Element("u").addContent(text); 332 break; 333 334 // 335 // These two are for local references - footnotes and 336 // references to footnotes. 337 // We embed the page name (or whatever WikiContext gives us) 338 // to make sure the links are unique across Wiki. 339 // 340 case LOCALREF: 341 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 342 break; 343 344 case LOCAL: 345 el = new Element("a").setAttribute("class",CLASS_FOOTNOTE); 346 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 347 el.addContent("["+text+"]"); 348 break; 349 350 // 351 // With the image, external and interwiki types we need to 352 // make sure nobody can put in Javascript or something else 353 // annoying into the links themselves. We do this by preventing 354 // a haxor from stopping the link name short with quotes in 355 // fillBuffer(). 356 // 357 case IMAGE: 358 el = new Element("img").setAttribute("class","inline"); 359 el.setAttribute("src",link); 360 el.setAttribute("alt",text); 361 break; 362 363 case IMAGELINK: 364 el = new Element("img").setAttribute("class","inline"); 365 el.setAttribute("src",link); 366 el.setAttribute("alt",text); 367 el = createAnchor(IMAGELINK,text,"","").addContent(el); 368 break; 369 370 case IMAGEWIKILINK: 371 String pagelink = m_context.getURL(WikiContext.VIEW,text); 372 el = new Element("img").setAttribute("class","inline"); 373 el.setAttribute("src",link); 374 el.setAttribute("alt",text); 375 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 376 break; 377 378 case EXTERNAL: 379 el = createAnchor( EXTERNAL, link, text, section ); 380 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 381 break; 382 383 case INTERWIKI: 384 el = createAnchor( INTERWIKI, link, text, section ); 385 break; 386 387 case ATTACHMENT: 388 String attlink = m_context.getURL( WikiContext.ATTACH, 389 link ); 390 391 String infolink = m_context.getURL( WikiContext.INFO, 392 link ); 393 394 String imglink = m_context.getURL( WikiContext.NONE, 395 "images/attachment_small.png" ); 396 397 el = createAnchor( ATTACHMENT, attlink, text, "" ); 398 399 pushElement(el); 400 popElement(el.getName()); 401 402 if( m_useAttachmentImage ) 403 { 404 el = new Element("img").setAttribute("src",imglink); 405 el.setAttribute("border","0"); 406 el.setAttribute("alt","(info)"); 407 408 el = new Element("a").setAttribute("href",infolink).addContent(el); 409 el.setAttribute("class","infolink"); 410 } 411 else 412 { 413 el = null; 414 } 415 break; 416 417 default: 418 break; 419 } 420 421 if( el != null && attributes != null ) 422 { 423 while( attributes.hasNext() ) 424 { 425 Attribute attr = (Attribute)attributes.next(); 426 if( attr != null ) 427 { 428 el.setAttribute(attr); 429 } 430 } 431 } 432 433 if( el != null ) 434 { 435 flushPlainText(); 436 m_currentElement.addContent( el ); 437 } 438 return el; 439 } 440 441 /** 442 * Figures out if a link is an off-site link. This recognizes 443 * the most common protocols by checking how it starts. 444 * 445 * @param link The link to check. 446 * @return true, if this is a link outside of this wiki. 447 * @since 2.4 448 * @deprecated - use {@link LinkParsingOperations#isExternalLink(String)} instead. 449 */ 450 @Deprecated 451 public static boolean isExternalLink( String link ) 452 { 453 return new LinkParsingOperations(null).isExternalLink( link ); 454 } 455 456 /** 457 * Returns true if the link is really command to insert 458 * a plugin. 459 * <P> 460 * Currently we just check if the link starts with "{INSERT", 461 * or just plain "{" but not "{$". 462 * 463 * @param link Link text, i.e. the contents of text between []. 464 * @return True, if this link seems to be a command to insert a plugin here. 465 * @deprecated Use {@link LinkParsingOperations#isPluginLink(String)} instead, 466 */ 467 @Deprecated 468 public static boolean isPluginLink( String link ) 469 { 470 return new LinkParsingOperations( null ).isPluginLink( link ); 471 } 472 473 /** 474 * These are all of the HTML 4.01 block-level elements. 475 */ 476 private static final String[] BLOCK_ELEMENTS = { 477 "address", "blockquote", "div", "dl", "fieldset", "form", 478 "h1", "h2", "h3", "h4", "h5", "h6", 479 "hr", "noscript", "ol", "p", "pre", "table", "ul" 480 }; 481 482 private static boolean isBlockLevel( String name ) 483 { 484 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 485 } 486 487 /** 488 * This method peeks ahead in the stream until EOL and returns the result. 489 * It will keep the buffers untouched. 490 * 491 * @return The string from the current position to the end of line. 492 */ 493 494 // FIXME: Always returns an empty line, even if the stream is full. 495 private String peekAheadLine() 496 throws IOException 497 { 498 String s = readUntilEOL().toString(); 499 500 if( s.length() > PUSHBACK_BUFFER_SIZE ) 501 { 502 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 503 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 504 } 505 else 506 { 507 try 508 { 509 pushBack( s ); 510 } 511 catch( IOException e ) 512 { 513 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 514 } 515 } 516 return s; 517 } 518 519 private int flushPlainText() 520 { 521 int numChars = m_plainTextBuf.length(); 522 523 if( numChars > 0 ) 524 { 525 String buf; 526 527 if( !m_allowHTML ) 528 { 529 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 530 } 531 else 532 { 533 buf = m_plainTextBuf.toString(); 534 } 535 // 536 // We must first empty the buffer because the side effect of 537 // calling makeCamelCaseLink() is to call this routine. 538 // 539 540 m_plainTextBuf = new StringBuilder(20); 541 542 try 543 { 544 // 545 // This is the heaviest part of parsing, and therefore we can 546 // do some optimization here. 547 // 548 // 1) Only when the length of the buffer is big enough, we try to do the match 549 // 550 551 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 552 { 553 // System.out.println("Buffer="+buf); 554 555 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 556 { 557 MatchResult result = m_camelCaseMatcher.getMatch(); 558 559 String firstPart = buf.substring(0,result.beginOffset(0)); 560 String prefix = result.group(1); 561 562 if( prefix == null ) prefix = ""; 563 564 String camelCase = result.group(2); 565 String protocol = result.group(3); 566 String uri = protocol+result.group(4); 567 buf = buf.substring(result.endOffset(0)); 568 569 m_currentElement.addContent( firstPart ); 570 571 // 572 // Check if the user does not wish to do URL or WikiWord expansion 573 // 574 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 575 { 576 if( prefix.endsWith("~") ) 577 { 578 if( m_wysiwygEditorMode ) 579 { 580 m_currentElement.addContent( "~" ); 581 } 582 prefix = prefix.substring(0,prefix.length()-1); 583 } 584 if( camelCase != null ) 585 { 586 m_currentElement.addContent( prefix+camelCase ); 587 } 588 else if( protocol != null ) 589 { 590 m_currentElement.addContent( prefix+uri ); 591 } 592 continue; 593 } 594 595 // 596 // Fine, then let's check what kind of a link this was 597 // and emit the proper elements 598 // 599 if( protocol != null ) 600 { 601 char c = uri.charAt(uri.length()-1); 602 if( c == '.' || c == ',' ) 603 { 604 uri = uri.substring(0,uri.length()-1); 605 buf = c + buf; 606 } 607 // System.out.println("URI match "+uri); 608 m_currentElement.addContent( prefix ); 609 makeDirectURILink( uri ); 610 } 611 else 612 { 613 // System.out.println("Matched: '"+camelCase+"'"); 614 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 615 // System.out.println("prefix="+prefix); 616 m_currentElement.addContent( prefix ); 617 618 makeCamelCaseLink( camelCase ); 619 } 620 } 621 622 m_currentElement.addContent( buf ); 623 } 624 else 625 { 626 // 627 // No camelcase asked for, just add the elements 628 // 629 m_currentElement.addContent( buf ); 630 } 631 } 632 catch( IllegalDataException e ) 633 { 634 // 635 // Sometimes it's possible that illegal XML chars is added to the data. 636 // Here we make sure it does not stop parsing. 637 // 638 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 639 } 640 } 641 642 return numChars; 643 } 644 645 /** 646 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 647 * entities that are already escaped). 648 * 649 * @param buf 650 * @return An escaped string. 651 */ 652 private String escapeHTMLEntities(String buf) 653 { 654 StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 655 656 for( int i = 0; i < buf.length(); i++ ) 657 { 658 char ch = buf.charAt(i); 659 660 if( ch == '<' ) 661 { 662 tmpBuf.append("<"); 663 } 664 else if( ch == '>' ) 665 { 666 tmpBuf.append(">"); 667 } 668 else if( ch == '\"' ) 669 { 670 tmpBuf.append("""); 671 } 672 else if( ch == '&' ) 673 { 674 // 675 // If the following is an XML entity reference (&#.*;) we'll 676 // leave it as it is; otherwise we'll replace it with an & 677 // 678 679 boolean isEntity = false; 680 StringBuilder entityBuf = new StringBuilder(); 681 682 if( i < buf.length() -1 ) 683 { 684 for( int j = i; j < buf.length(); j++ ) 685 { 686 char ch2 = buf.charAt(j); 687 688 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 689 { 690 entityBuf.append(ch2); 691 692 if( ch2 == ';' ) 693 { 694 isEntity = true; 695 break; 696 } 697 } 698 else 699 { 700 break; 701 } 702 } 703 } 704 705 if( isEntity ) 706 { 707 tmpBuf.append( entityBuf ); 708 i = i + entityBuf.length() - 1; 709 } 710 else 711 { 712 tmpBuf.append("&"); 713 } 714 715 } 716 else 717 { 718 tmpBuf.append( ch ); 719 } 720 } 721 722 return tmpBuf.toString(); 723 } 724 725 private Element pushElement( Element e ) 726 { 727 flushPlainText(); 728 m_currentElement.addContent( e ); 729 m_currentElement = e; 730 731 return e; 732 } 733 734 private Element addElement( Content e ) 735 { 736 if( e != null ) 737 { 738 flushPlainText(); 739 m_currentElement.addContent( e ); 740 } 741 return m_currentElement; 742 } 743 744 /** 745 * All elements that can be empty by the HTML DTD. 746 */ 747 // Keep sorted. 748 private static final String[] EMPTY_ELEMENTS = { 749 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 750 }; 751 752 /** 753 * Goes through the current element stack and pops all elements until this 754 * element is found - this essentially "closes" and element. 755 * 756 * @param s 757 * @return The new current element, or null, if there was no such element in the entire stack. 758 */ 759 private Element popElement( String s ) 760 { 761 int flushedBytes = flushPlainText(); 762 763 Element currEl = m_currentElement; 764 765 while( currEl.getParentElement() != null ) 766 { 767 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 768 { 769 m_currentElement = currEl.getParentElement(); 770 771 // 772 // Check if it's okay for this element to be empty. Then we will 773 // trick the JDOM generator into not generating an empty element, 774 // by putting an empty string between the tags. Yes, it's a kludge 775 // but what'cha gonna do about it. :-) 776 // 777 778 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 779 { 780 currEl.addContent(""); 781 } 782 783 return m_currentElement; 784 } 785 786 currEl = currEl.getParentElement(); 787 } 788 789 return null; 790 } 791 792 793 /** 794 * Reads the stream until it meets one of the specified 795 * ending characters, or stream end. The ending character will be left 796 * in the stream. 797 */ 798 private String readUntil( String endChars ) 799 throws IOException 800 { 801 StringBuilder sb = new StringBuilder( 80 ); 802 int ch = nextToken(); 803 804 while( ch != -1 ) 805 { 806 if( ch == '\\' ) 807 { 808 ch = nextToken(); 809 if( ch == -1 ) 810 { 811 break; 812 } 813 } 814 else 815 { 816 if( endChars.indexOf((char)ch) != -1 ) 817 { 818 pushBack( ch ); 819 break; 820 } 821 } 822 sb.append( (char) ch ); 823 ch = nextToken(); 824 } 825 826 return sb.toString(); 827 } 828 829 /** 830 * Reads the stream while the characters that have been specified are 831 * in the stream, returning then the result as a String. 832 */ 833 private String readWhile( String endChars ) 834 throws IOException 835 { 836 StringBuilder sb = new StringBuilder( 80 ); 837 int ch = nextToken(); 838 839 while( ch != -1 ) 840 { 841 if( endChars.indexOf((char)ch) == -1 ) 842 { 843 pushBack( ch ); 844 break; 845 } 846 847 sb.append( (char) ch ); 848 ch = nextToken(); 849 } 850 851 return sb.toString(); 852 } 853 854 private JSPWikiMarkupParser m_cleanTranslator; 855 856 /** 857 * Does a lazy init. Otherwise, we would get into a situation 858 * where HTMLRenderer would try and boot a TranslatorReader before 859 * the TranslatorReader it is contained by is up. 860 */ 861 private JSPWikiMarkupParser getCleanTranslator() 862 { 863 if( m_cleanTranslator == null ) 864 { 865 WikiContext dummyContext = new WikiContext( m_engine, 866 m_context.getHttpRequest(), 867 m_context.getPage() ); 868 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 869 870 m_cleanTranslator.m_allowHTML = true; 871 } 872 873 return m_cleanTranslator; 874 } 875 /** 876 * Modifies the "hd" parameter to contain proper values. Because 877 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 878 * % after url encoding with '_'. 879 * <p> 880 * Counts also duplicate headings (= headings with similar name), and 881 * attaches a counter. 882 */ 883 private String makeHeadingAnchor( String baseName, String title, Heading hd ) 884 { 885 hd.m_titleText = title; 886 title = MarkupParser.wikifyLink( title ); 887 888 hd.m_titleSection = m_engine.encodeName(title); 889 890 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) 891 { 892 Integer count = m_titleSectionCounter.get( hd.m_titleSection ); 893 count = count + 1; 894 m_titleSectionCounter.put( hd.m_titleSection, count ); 895 hd.m_titleSection += "-" + count; 896 } 897 else 898 { 899 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 900 } 901 902 hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+ 903 "-"+hd.m_titleSection; 904 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 905 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 906 907 return hd.m_titleAnchor; 908 } 909 910 private String makeSectionTitle( String title ) 911 { 912 title = title.trim(); 913 String outTitle; 914 915 try 916 { 917 JSPWikiMarkupParser dtr = getCleanTranslator(); 918 dtr.setInputReader( new StringReader(title) ); 919 920 CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse()); 921 922 outTitle = ctt.getString(); 923 } 924 catch( IOException e ) 925 { 926 log.fatal("CleanTranslator not working", e); 927 throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e); 928 } 929 930 return outTitle; 931 } 932 933 /** 934 * Returns XHTML for the heading. 935 * 936 * @param level The level of the heading. @see Heading 937 * @param title the title for the heading 938 * @param hd a List to which heading should be added 939 * @return An Element containing the heading 940 */ 941 public Element makeHeading( int level, String title, Heading hd ) 942 { 943 Element el = null; 944 945 String pageName = m_context.getPage().getName(); 946 947 String outTitle = makeSectionTitle( title ); 948 949 hd.m_level = level; 950 951 switch( level ) 952 { 953 case Heading.HEADING_SMALL: 954 el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 955 break; 956 957 case Heading.HEADING_MEDIUM: 958 el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 959 break; 960 961 case Heading.HEADING_LARGE: 962 el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 963 break; 964 965 default: 966 throw new InternalWikiException("Illegal heading type "+level); 967 } 968 969 970 return el; 971 } 972 973 /** 974 * When given a link to a WikiName, we just return 975 * a proper HTML link for it. The local link mutator 976 * chain is also called. 977 */ 978 private Element makeCamelCaseLink( String wikiname ) 979 { 980 String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 981 982 callMutatorChain( m_localLinkMutatorChain, wikiname ); 983 984 if( matchedLink != null ) { 985 makeLink( READ, matchedLink, wikiname, null, null ); 986 } else { 987 makeLink( EDIT, wikiname, wikiname, null, null ); 988 } 989 990 return m_currentElement; 991 } 992 993 /** Holds the image URL for the duration of this parser */ 994 private String m_outlinkImageURL = null; 995 996 /** 997 * Returns an element for the external link image (out.png). However, 998 * this method caches the URL for the lifetime of this MarkupParser, 999 * because it's commonly used, and we'll end up with possibly hundreds 1000 * our thousands of references to it... It's a lot faster, too. 1001 * 1002 * @return An element containing the HTML for the outlink image. 1003 */ 1004 private Element outlinkImage() 1005 { 1006 Element el = null; 1007 1008 if( m_useOutlinkImage ) 1009 { 1010 if( m_outlinkImageURL == null ) 1011 { 1012 m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE ); 1013 } 1014 1015 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 1016 el.setAttribute( "src", m_outlinkImageURL ); 1017 el.setAttribute( "alt","" ); 1018 } 1019 1020 return el; 1021 } 1022 1023 /** 1024 * Takes an URL and turns it into a regular wiki link. Unfortunately, 1025 * because of the way that flushPlainText() works, it already encodes 1026 * all of the XML entities. But so does WikiContext.getURL(), so we 1027 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 1028 * <p> 1029 * What a crappy problem. 1030 * 1031 * @param url 1032 * @return An anchor Element containing the link. 1033 */ 1034 private Element makeDirectURILink( String url ) 1035 { 1036 Element result; 1037 String last = null; 1038 1039 if( url.endsWith(",") || url.endsWith(".") ) 1040 { 1041 last = url.substring( url.length()-1 ); 1042 url = url.substring( 0, url.length()-1 ); 1043 } 1044 1045 callMutatorChain( m_externalLinkMutatorChain, url ); 1046 1047 if( m_linkParsingOperations.isImageLink( url ) ) 1048 { 1049 result = handleImageLink( StringUtils.replace(url,"&","&"), url, false ); 1050 } 1051 else 1052 { 1053 result = makeLink( EXTERNAL, StringUtils.replace(url,"&","&"), url, null, null ); 1054 addElement( outlinkImage() ); 1055 } 1056 1057 if( last != null ) 1058 { 1059 m_plainTextBuf.append(last); 1060 } 1061 1062 return result; 1063 } 1064 1065 /** 1066 * Image links are handled differently: 1067 * 1. If the text is a WikiName of an existing page, 1068 * it gets linked. 1069 * 2. If the text is an external link, then it is inlined. 1070 * 3. Otherwise it becomes an ALT text. 1071 * 1072 * @param reallink The link to the image. 1073 * @param link Link text portion, may be a link to somewhere else. 1074 * @param hasLinkText If true, then the defined link had a link text available. 1075 * This means that the link text may be a link to a wiki page, 1076 * or an external resource. 1077 */ 1078 1079 // FIXME: isExternalLink() is called twice. 1080 private Element handleImageLink( String reallink, String link, boolean hasLinkText ) 1081 { 1082 String possiblePage = MarkupParser.cleanLink( link ); 1083 1084 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) 1085 { 1086 return makeLink( IMAGELINK, reallink, link, null, null ); 1087 } 1088 else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) 1089 { 1090 // System.out.println("Orig="+link+", Matched: "+matchedLink); 1091 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 1092 1093 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 1094 } 1095 else 1096 { 1097 return makeLink( IMAGE, reallink, link, null, null ); 1098 } 1099 } 1100 1101 private Element handleAccessRule( String ruleLine ) 1102 { 1103 if( m_wysiwygEditorMode ) 1104 { 1105 m_currentElement.addContent( "[" + ruleLine + "]" ); 1106 } 1107 1108 if( !m_parseAccessRules ) return m_currentElement; 1109 Acl acl; 1110 WikiPage page = m_context.getRealPage(); 1111 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1112 1113 if( ruleLine.startsWith( "{" ) ) 1114 ruleLine = ruleLine.substring( 1 ); 1115 if( ruleLine.endsWith( "}" ) ) 1116 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1117 1118 if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine); 1119 1120 try 1121 { 1122 acl = m_engine.getAclManager().parseAcl( page, ruleLine ); 1123 1124 page.setAcl( acl ); 1125 1126 if( log.isDebugEnabled() ) log.debug( acl.toString() ); 1127 } 1128 catch( WikiSecurityException wse ) 1129 { 1130 return makeError( wse.getMessage() ); 1131 } 1132 1133 return m_currentElement; 1134 } 1135 1136 /** 1137 * Handles metadata setting [{SET foo=bar}] 1138 */ 1139 private Element handleMetadata( String link ) 1140 { 1141 if( m_wysiwygEditorMode ) 1142 { 1143 m_currentElement.addContent( "[" + link + "]" ); 1144 } 1145 1146 try 1147 { 1148 String args = link.substring( link.indexOf(' '), link.length()-1 ); 1149 1150 String name = args.substring( 0, args.indexOf('=') ); 1151 String val = args.substring( args.indexOf('=')+1, args.length() ); 1152 1153 name = name.trim(); 1154 val = val.trim(); 1155 1156 if( val.startsWith("'") ) val = val.substring( 1 ); 1157 if( val.endsWith("'") ) val = val.substring( 0, val.length()-1 ); 1158 1159 // log.debug("SET name='"+name+"', value='"+val+"'."); 1160 1161 if( name.length() > 0 && val.length() > 0 ) 1162 { 1163 val = m_engine.getVariableManager().expandVariables( m_context, 1164 val ); 1165 1166 m_context.getPage().setAttribute( name, val ); 1167 } 1168 } 1169 catch( Exception e ) 1170 { 1171 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1172 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1173 } 1174 1175 return m_currentElement; 1176 } 1177 1178 /** 1179 * Emits a processing instruction that will disable markup escaping. This is 1180 * very useful if you want to emit HTML directly into the stream. 1181 * 1182 */ 1183 private void disableOutputEscaping() 1184 { 1185 addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") ); 1186 } 1187 1188 /** 1189 * Gobbles up all hyperlinks that are encased in square brackets. 1190 */ 1191 private Element handleHyperlinks( String linktext, int pos ) 1192 { 1193 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1194 1195 StringBuilder sb = new StringBuilder(linktext.length()+80); 1196 1197 if( m_linkParsingOperations.isAccessRule( linktext ) ) 1198 { 1199 return handleAccessRule( linktext ); 1200 } 1201 1202 if( m_linkParsingOperations.isMetadata( linktext ) ) 1203 { 1204 return handleMetadata( linktext ); 1205 } 1206 1207 if( m_linkParsingOperations.isPluginLink( linktext ) ) 1208 { 1209 try 1210 { 1211 PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1212 // 1213 // This might sometimes fail, especially if there is something which looks 1214 // like a plugin invocation but is really not. 1215 // 1216 if( pluginContent != null ) 1217 { 1218 addElement( pluginContent ); 1219 1220 pluginContent.executeParse( m_context ); 1221 } 1222 } 1223 catch( PluginException e ) 1224 { 1225 log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 1226 //log.info( "Root cause:",e.getRootThrowable() ); 1227 if( !m_wysiwygEditorMode ) 1228 { 1229 ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1230 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 1231 m_context.getRealPage().getWiki(), 1232 m_context.getRealPage().getName(), 1233 e.getMessage() ) ) ); 1234 } 1235 } 1236 1237 return m_currentElement; 1238 } 1239 1240 try 1241 { 1242 LinkParser.Link link = m_linkParser.parse(linktext); 1243 linktext = link.getText(); 1244 String linkref = link.getReference(); 1245 1246 // 1247 // Yes, we now have the components separated. 1248 // linktext = the text the link should have 1249 // linkref = the url or page name. 1250 // 1251 // In many cases these are the same. [linktext|linkref]. 1252 // 1253 if( m_linkParsingOperations.isVariableLink( linktext ) ) 1254 { 1255 Content el = new VariableContent(linktext); 1256 1257 addElement( el ); 1258 } 1259 else if( m_linkParsingOperations.isExternalLink( linkref ) ) 1260 { 1261 // It's an external link, out of this Wiki 1262 1263 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1264 1265 if( m_linkParsingOperations.isImageLink( linkref ) ) 1266 { 1267 handleImageLink( linkref, linktext, link.hasReference() ); 1268 } 1269 else 1270 { 1271 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1272 addElement( outlinkImage() ); 1273 } 1274 } 1275 else if( link.isInterwikiLink() ) 1276 { 1277 // It's an interwiki link 1278 // InterWiki links also get added to external link chain 1279 // after the links have been resolved. 1280 1281 // FIXME: There is an interesting issue here: We probably should 1282 // URLEncode the wikiPage, but we can't since some of the 1283 // Wikis use slashes (/), which won't survive URLEncoding. 1284 // Besides, we don't know which character set the other Wiki 1285 // is using, so you'll have to write the entire name as it appears 1286 // in the URL. Bugger. 1287 1288 String extWiki = link.getExternalWiki(); 1289 String wikiPage = link.getExternalWikiPage(); 1290 1291 if( m_wysiwygEditorMode ) 1292 { 1293 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1294 } 1295 else 1296 { 1297 String urlReference = m_engine.getInterWikiURL( extWiki ); 1298 1299 if( urlReference != null ) 1300 { 1301 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1302 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1303 1304 if( m_linkParsingOperations.isImageLink(urlReference) ) 1305 { 1306 handleImageLink( urlReference, linktext, link.hasReference() ); 1307 } 1308 else 1309 { 1310 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1311 } 1312 1313 if( m_linkParsingOperations.isExternalLink(urlReference) ) 1314 { 1315 addElement( outlinkImage() ); 1316 } 1317 } 1318 else 1319 { 1320 Object[] args = { extWiki }; 1321 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1322 } 1323 } 1324 } 1325 else if( linkref.startsWith("#") ) 1326 { 1327 // It defines a local footnote 1328 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1329 } 1330 else if( TextUtil.isNumber( linkref ) ) 1331 { 1332 // It defines a reference to a local footnote 1333 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1334 } 1335 else 1336 { 1337 int hashMark = -1; 1338 1339 // 1340 // Internal wiki link, but is it an attachment link? 1341 // 1342 String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref ); 1343 if( attachment != null ) 1344 { 1345 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1346 1347 if( m_linkParsingOperations.isImageLink( linkref ) ) 1348 { 1349 attachment = m_context.getURL( WikiContext.ATTACH, attachment ); 1350 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1351 } 1352 else 1353 { 1354 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1355 } 1356 } 1357 else if( (hashMark = linkref.indexOf('#')) != -1 ) 1358 { 1359 // It's an internal Wiki link, but to a named section 1360 1361 String namedSection = linkref.substring( hashMark+1 ); 1362 linkref = linkref.substring( 0, hashMark ); 1363 1364 linkref = MarkupParser.cleanLink( linkref ); 1365 1366 callMutatorChain( m_localLinkMutatorChain, linkref ); 1367 1368 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1369 if( matchedLink != null ) { 1370 String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection)); 1371 sectref = sectref.replace('%', '_'); 1372 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1373 } else { 1374 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1375 } 1376 } 1377 else 1378 { 1379 // It's an internal Wiki link 1380 linkref = MarkupParser.cleanLink( linkref ); 1381 1382 callMutatorChain( m_localLinkMutatorChain, linkref ); 1383 1384 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1385 if( matchedLink != null ) { 1386 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1387 } else { 1388 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1389 } 1390 } 1391 } 1392 } 1393 catch( ParseException e ) 1394 { 1395 log.info("Parser failure: ",e); 1396 Object[] args = { e.getMessage() }; 1397 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1398 } 1399 1400 return m_currentElement; 1401 } 1402 1403 /** 1404 * Pushes back any string that has been read. It will obviously 1405 * be pushed back in a reverse order. 1406 * 1407 * @since 2.1.77 1408 */ 1409 private void pushBack( String s ) 1410 throws IOException 1411 { 1412 for( int i = s.length()-1; i >= 0; i-- ) 1413 { 1414 pushBack( s.charAt(i) ); 1415 } 1416 } 1417 1418 private Element handleBackslash() 1419 throws IOException 1420 { 1421 int ch = nextToken(); 1422 1423 if( ch == '\\' ) 1424 { 1425 int ch2 = nextToken(); 1426 1427 if( ch2 == '\\' ) 1428 { 1429 pushElement( new Element("br").setAttribute("clear","all")); 1430 return popElement("br"); 1431 } 1432 1433 pushBack( ch2 ); 1434 1435 pushElement( new Element("br") ); 1436 return popElement("br"); 1437 } 1438 1439 pushBack( ch ); 1440 1441 return null; 1442 } 1443 1444 private Element handleUnderscore() 1445 throws IOException 1446 { 1447 int ch = nextToken(); 1448 Element el = null; 1449 1450 if( ch == '_' ) 1451 { 1452 if( m_isbold ) 1453 { 1454 el = popElement("b"); 1455 } 1456 else 1457 { 1458 el = pushElement( new Element("b") ); 1459 } 1460 m_isbold = !m_isbold; 1461 } 1462 else 1463 { 1464 pushBack( ch ); 1465 } 1466 1467 return el; 1468 } 1469 1470 1471 /** 1472 * For example: italics. 1473 */ 1474 private Element handleApostrophe() 1475 throws IOException 1476 { 1477 int ch = nextToken(); 1478 Element el = null; 1479 1480 if( ch == '\'' ) 1481 { 1482 if( m_isitalic ) 1483 { 1484 el = popElement("i"); 1485 } 1486 else 1487 { 1488 el = pushElement( new Element("i") ); 1489 } 1490 m_isitalic = !m_isitalic; 1491 } 1492 else 1493 { 1494 pushBack( ch ); 1495 } 1496 1497 return el; 1498 } 1499 1500 private Element handleOpenbrace( boolean isBlock ) 1501 throws IOException 1502 { 1503 int ch = nextToken(); 1504 1505 if( ch == '{' ) 1506 { 1507 int ch2 = nextToken(); 1508 1509 if( ch2 == '{' ) 1510 { 1511 m_isPre = true; 1512 m_isEscaping = true; 1513 m_isPreBlock = isBlock; 1514 1515 if( isBlock ) 1516 { 1517 startBlockLevel(); 1518 return pushElement( new Element("pre") ); 1519 } 1520 1521 return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") ); 1522 } 1523 1524 pushBack( ch2 ); 1525 1526 return pushElement( new Element("tt") ); 1527 } 1528 1529 pushBack( ch ); 1530 1531 return null; 1532 } 1533 1534 /** 1535 * Handles both }} and }}} 1536 */ 1537 private Element handleClosebrace() 1538 throws IOException 1539 { 1540 int ch2 = nextToken(); 1541 1542 if( ch2 == '}' ) 1543 { 1544 int ch3 = nextToken(); 1545 1546 if( ch3 == '}' ) 1547 { 1548 if( m_isPre ) 1549 { 1550 if( m_isPreBlock ) 1551 { 1552 popElement( "pre" ); 1553 } 1554 else 1555 { 1556 popElement( "span" ); 1557 } 1558 1559 m_isPre = false; 1560 m_isEscaping = false; 1561 return m_currentElement; 1562 } 1563 1564 m_plainTextBuf.append("}}}"); 1565 return m_currentElement; 1566 } 1567 1568 pushBack( ch3 ); 1569 1570 if( !m_isEscaping ) 1571 { 1572 return popElement("tt"); 1573 } 1574 } 1575 1576 pushBack( ch2 ); 1577 1578 return null; 1579 } 1580 1581 private Element handleDash() 1582 throws IOException 1583 { 1584 int ch = nextToken(); 1585 1586 if( ch == '-' ) 1587 { 1588 int ch2 = nextToken(); 1589 1590 if( ch2 == '-' ) 1591 { 1592 int ch3 = nextToken(); 1593 1594 if( ch3 == '-' ) 1595 { 1596 // Empty away all the rest of the dashes. 1597 // Do not forget to return the first non-match back. 1598 do 1599 { 1600 ch = nextToken(); 1601 } 1602 while ( ch == '-' ); 1603 1604 pushBack(ch); 1605 startBlockLevel(); 1606 pushElement( new Element("hr") ); 1607 return popElement( "hr" ); 1608 } 1609 1610 pushBack( ch3 ); 1611 } 1612 pushBack( ch2 ); 1613 } 1614 1615 pushBack( ch ); 1616 1617 return null; 1618 } 1619 1620 private Element handleHeading() 1621 throws IOException 1622 { 1623 Element el = null; 1624 1625 int ch = nextToken(); 1626 1627 Heading hd = new Heading(); 1628 1629 if( ch == '!' ) 1630 { 1631 int ch2 = nextToken(); 1632 1633 if( ch2 == '!' ) 1634 { 1635 String title = peekAheadLine(); 1636 1637 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1638 } 1639 else 1640 { 1641 pushBack( ch2 ); 1642 String title = peekAheadLine(); 1643 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1644 } 1645 } 1646 else 1647 { 1648 pushBack( ch ); 1649 String title = peekAheadLine(); 1650 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1651 } 1652 1653 callHeadingListenerChain( hd ); 1654 1655 m_lastHeading = hd; 1656 1657 if( el != null ) pushElement(el); 1658 1659 return el; 1660 } 1661 1662 /** 1663 * Reads the stream until the next EOL or EOF. Note that it will also read the 1664 * EOL from the stream. 1665 */ 1666 private StringBuilder readUntilEOL() 1667 throws IOException 1668 { 1669 int ch; 1670 StringBuilder buf = new StringBuilder( 256 ); 1671 1672 while( true ) 1673 { 1674 ch = nextToken(); 1675 1676 if( ch == -1 ) 1677 break; 1678 1679 buf.append( (char) ch ); 1680 1681 if( ch == '\n' ) 1682 break; 1683 } 1684 return buf; 1685 } 1686 1687 /** Controls whether italic is restarted after a paragraph shift */ 1688 1689 private boolean m_restartitalic = false; 1690 private boolean m_restartbold = false; 1691 1692 private boolean m_newLine; 1693 1694 /** 1695 * Starts a block level element, therefore closing 1696 * a potential open paragraph tag. 1697 */ 1698 private void startBlockLevel() 1699 { 1700 // These may not continue over block level limits in XHTML 1701 1702 popElement("i"); 1703 popElement("b"); 1704 popElement("tt"); 1705 1706 if( m_isOpenParagraph ) 1707 { 1708 m_isOpenParagraph = false; 1709 popElement("p"); 1710 m_plainTextBuf.append("\n"); // Just small beautification 1711 } 1712 1713 m_restartitalic = m_isitalic; 1714 m_restartbold = m_isbold; 1715 1716 m_isitalic = false; 1717 m_isbold = false; 1718 } 1719 1720 private static String getListType( char c ) 1721 { 1722 if( c == '*' ) 1723 { 1724 return "ul"; 1725 } 1726 else if( c == '#' ) 1727 { 1728 return "ol"; 1729 } 1730 throw new InternalWikiException("Parser got faulty list type: "+c); 1731 } 1732 /** 1733 * Like original handleOrderedList() and handleUnorderedList() 1734 * however handles both ordered ('#') and unordered ('*') mixed together. 1735 */ 1736 1737 // FIXME: Refactor this; it's a bit messy. 1738 1739 private Element handleGeneralList() 1740 throws IOException 1741 { 1742 startBlockLevel(); 1743 1744 String strBullets = readWhile( "*#" ); 1745 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1746 int numBullets = strBullets.length(); 1747 1748 // override the beginning portion of bullet pattern to be like the previous 1749 // to simulate PHPWiki style lists 1750 1751 if(m_allowPHPWikiStyleLists) 1752 { 1753 // only substitute if different 1754 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1755 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 1756 { 1757 if(numBullets <= m_genlistlevel) 1758 { 1759 // Substitute all but the last character (keep the expressed bullet preference) 1760 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 1761 + strBullets.substring(numBullets-1, numBullets); 1762 } 1763 else 1764 { 1765 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 1766 } 1767 } 1768 } 1769 1770 // 1771 // Check if this is still of the same type 1772 // 1773 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1774 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 1775 { 1776 if( numBullets > m_genlistlevel ) 1777 { 1778 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 1779 1780 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 1781 { 1782 // bullets are growing, get from new bullet list 1783 pushElement( new Element("li") ); 1784 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 1785 } 1786 } 1787 else if( numBullets < m_genlistlevel ) 1788 { 1789 // Close the previous list item. 1790 // buf.append( m_renderer.closeListItem() ); 1791 popElement( "li" ); 1792 1793 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 1794 { 1795 // bullets are shrinking, get from old bullet list 1796 1797 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1798 if( m_genlistlevel > 0 ) 1799 { 1800 popElement( "li" ); 1801 } 1802 1803 } 1804 } 1805 else 1806 { 1807 if( m_genlistlevel > 0 ) 1808 { 1809 popElement( "li" ); 1810 } 1811 } 1812 } 1813 else 1814 { 1815 // 1816 // The pattern has changed, unwind and restart 1817 // 1818 int numEqualBullets; 1819 int numCheckBullets; 1820 1821 // find out how much is the same 1822 numEqualBullets = 0; 1823 numCheckBullets = Math.min(numBullets,m_genlistlevel); 1824 1825 while( numEqualBullets < numCheckBullets ) 1826 { 1827 // if the bullets are equal so far, keep going 1828 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 1829 numEqualBullets++; 1830 // otherwise giveup, we have found how many are equal 1831 else 1832 break; 1833 } 1834 1835 //unwind 1836 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 1837 { 1838 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 1839 if( m_genlistlevel > numBullets ) 1840 { 1841 popElement("li"); 1842 } 1843 } 1844 1845 //rewind 1846 1847 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 1848 for(int i = numEqualBullets; i < numBullets; i++) 1849 { 1850 pushElement( new Element("li") ); 1851 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 1852 } 1853 m_genlistlevel = numBullets; 1854 } 1855 1856 // 1857 // Push a new list item, and eat away any extra whitespace 1858 // 1859 pushElement( new Element("li") ); 1860 readWhile(" "); 1861 1862 // work done, remember the new bullet list (in place of old one) 1863 m_genlistBulletBuffer.setLength(0); 1864 m_genlistBulletBuffer.append(strBullets); 1865 1866 return m_currentElement; 1867 } 1868 1869 private Element unwindGeneralList() 1870 { 1871 //unwind 1872 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 1873 { 1874 popElement( "li" ); 1875 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1876 } 1877 1878 m_genlistBulletBuffer.setLength(0); 1879 1880 return null; 1881 } 1882 1883 1884 private Element handleDefinitionList() 1885 throws IOException 1886 { 1887 if( !m_isdefinition ) 1888 { 1889 m_isdefinition = true; 1890 1891 startBlockLevel(); 1892 1893 pushElement( new Element("dl") ); 1894 return pushElement( new Element("dt") ); 1895 } 1896 1897 return null; 1898 } 1899 1900 private Element handleOpenbracket() 1901 throws IOException 1902 { 1903 StringBuilder sb = new StringBuilder(40); 1904 int pos = getPosition(); 1905 int ch = nextToken(); 1906 boolean isPlugin = false; 1907 1908 if( ch == '[' ) 1909 { 1910 if( m_wysiwygEditorMode ) 1911 { 1912 sb.append( '[' ); 1913 } 1914 1915 sb.append( (char)ch ); 1916 1917 while( (ch = nextToken()) == '[' ) 1918 { 1919 sb.append( (char)ch ); 1920 } 1921 } 1922 1923 1924 if( ch == '{' ) 1925 { 1926 isPlugin = true; 1927 } 1928 1929 pushBack( ch ); 1930 1931 if( sb.length() > 0 ) 1932 { 1933 m_plainTextBuf.append( sb ); 1934 return m_currentElement; 1935 } 1936 1937 // 1938 // Find end of hyperlink 1939 // 1940 1941 ch = nextToken(); 1942 int nesting = 1; // Check for nested plugins 1943 1944 while( ch != -1 ) 1945 { 1946 int ch2 = nextToken(); pushBack(ch2); 1947 1948 if( isPlugin ) 1949 { 1950 if( ch == '[' && ch2 == '{' ) 1951 { 1952 nesting++; 1953 } 1954 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 1955 { 1956 break; 1957 } 1958 else if( ch == '}' && ch2 == ']' ) 1959 { 1960 // NB: This will be decremented once at the end 1961 nesting--; 1962 } 1963 } 1964 else 1965 { 1966 if( ch == ']' ) 1967 { 1968 break; 1969 } 1970 } 1971 1972 sb.append( (char) ch ); 1973 1974 ch = nextToken(); 1975 } 1976 1977 // 1978 // If the link is never finished, do some tricks to display the rest of the line 1979 // unchanged. 1980 // 1981 if( ch == -1 ) 1982 { 1983 log.debug("Warning: unterminated link detected!"); 1984 m_isEscaping = true; 1985 m_plainTextBuf.append( sb ); 1986 flushPlainText(); 1987 m_isEscaping = false; 1988 return m_currentElement; 1989 } 1990 1991 return handleHyperlinks( sb.toString(), pos ); 1992 } 1993 1994 /** 1995 * Reads the stream until the current brace is closed or stream end. 1996 */ 1997 private String readBraceContent( char opening, char closing ) 1998 throws IOException 1999 { 2000 StringBuilder sb = new StringBuilder(40); 2001 int braceLevel = 1; 2002 int ch; 2003 while(( ch = nextToken() ) != -1 ) 2004 { 2005 if( ch == '\\' ) 2006 { 2007 continue; 2008 } 2009 else if ( ch == opening ) 2010 { 2011 braceLevel++; 2012 } 2013 else if ( ch == closing ) 2014 { 2015 braceLevel--; 2016 if (braceLevel==0) 2017 { 2018 break; 2019 } 2020 } 2021 sb.append( (char)ch ); 2022 } 2023 return sb.toString(); 2024 } 2025 2026 2027 /** 2028 * Handles constructs of type %%(style) and %%class 2029 * @param newLine 2030 * @return An Element containing the div or span, depending on the situation. 2031 * @throws IOException 2032 */ 2033 private Element handleDiv( boolean newLine ) 2034 throws IOException 2035 { 2036 int ch = nextToken(); 2037 Element el = null; 2038 2039 if( ch == '%' ) 2040 { 2041 String style = null; 2042 String clazz = null; 2043 2044 ch = nextToken(); 2045 2046 // 2047 // Style or class? 2048 // 2049 if( ch == '(' ) 2050 { 2051 style = readBraceContent('(',')'); 2052 } 2053 else if( Character.isLetter( (char) ch ) ) 2054 { 2055 pushBack( ch ); 2056 clazz = readUntil( " \t\n\r" ); 2057 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 2058 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 2059 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 2060 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 2061 2062 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 2063 //(2) remove any invalid character 2064 if( clazz != null){ 2065 2066 clazz = clazz.replace('.', ' ') 2067 .replaceAll("[^\\s-_\\w\\x200-\\x377]+",""); 2068 2069 } 2070 ch = nextToken(); 2071 2072 // 2073 // Pop out only spaces, so that the upcoming EOL check does not check the 2074 // next line. 2075 // 2076 if( ch == '\n' || ch == '\r' ) 2077 { 2078 pushBack(ch); 2079 } 2080 } 2081 else 2082 { 2083 // 2084 // Anything else stops. 2085 // 2086 2087 pushBack(ch); 2088 2089 try 2090 { 2091 Boolean isSpan = m_styleStack.pop(); 2092 2093 if( isSpan == null ) 2094 { 2095 // Fail quietly 2096 } 2097 else if( isSpan.booleanValue() ) 2098 { 2099 el = popElement( "span" ); 2100 } 2101 else 2102 { 2103 el = popElement( "div" ); 2104 } 2105 } 2106 catch( EmptyStackException e ) 2107 { 2108 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 2109 return m_currentElement; 2110 } 2111 2112 return el; 2113 } 2114 2115 // 2116 // Check if there is an attempt to do something nasty 2117 // 2118 2119 try 2120 { 2121 style = StringEscapeUtils.unescapeHtml(style); 2122 if( style != null && style.indexOf("javascript:") != -1 ) 2123 { 2124 log.debug("Attempt to output javascript within CSS:"+style); 2125 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2126 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 2127 } 2128 } 2129 catch( NumberFormatException e ) 2130 { 2131 // 2132 // If there are unknown entities, we don't want the parser to stop. 2133 // 2134 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2135 String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 2136 return addElement( makeError( msg ) ); 2137 } 2138 2139 // 2140 // Decide if we should open a div or a span? 2141 // 2142 String eol = peekAheadLine(); 2143 2144 if( eol.trim().length() > 0 ) 2145 { 2146 // There is stuff after the class 2147 2148 el = new Element("span"); 2149 2150 m_styleStack.push( Boolean.TRUE ); 2151 } 2152 else 2153 { 2154 startBlockLevel(); 2155 el = new Element("div"); 2156 m_styleStack.push( Boolean.FALSE ); 2157 } 2158 2159 if( style != null ) el.setAttribute("style", style); 2160 if( clazz != null ) el.setAttribute("class", clazz); 2161 el = pushElement( el ); 2162 2163 return el; 2164 } 2165 2166 pushBack(ch); 2167 2168 return el; 2169 } 2170 2171 private Element handleSlash( boolean newLine ) 2172 throws IOException 2173 { 2174 int ch = nextToken(); 2175 2176 pushBack(ch); 2177 if( ch == '%' && !m_styleStack.isEmpty() ) 2178 { 2179 return handleDiv( newLine ); 2180 } 2181 2182 return null; 2183 } 2184 2185 private Element handleBar( boolean newLine ) 2186 throws IOException 2187 { 2188 Element el = null; 2189 2190 if( !m_istable && !newLine ) 2191 { 2192 return null; 2193 } 2194 2195 // 2196 // If the bar is in the first column, we will either start 2197 // a new table or continue the old one. 2198 // 2199 2200 if( newLine ) 2201 { 2202 if( !m_istable ) 2203 { 2204 startBlockLevel(); 2205 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2206 m_istable = true; 2207 m_rowNum = 0; 2208 } 2209 2210 m_rowNum++; 2211 Element tr = ( m_rowNum % 2 != 0 ) 2212 ? new Element("tr").setAttribute("class", "odd") 2213 : new Element("tr"); 2214 el = pushElement( tr ); 2215 } 2216 2217 // 2218 // Check out which table cell element to start; 2219 // a header element (th) or a regular element (td). 2220 // 2221 int ch = nextToken(); 2222 2223 if( ch == '|' ) 2224 { 2225 if( !newLine ) 2226 { 2227 el = popElement("th"); 2228 if( el == null ) popElement("td"); 2229 } 2230 el = pushElement( new Element("th") ); 2231 } 2232 else 2233 { 2234 if( !newLine ) 2235 { 2236 el = popElement("td"); 2237 if( el == null ) popElement("th"); 2238 } 2239 2240 el = pushElement( new Element("td") ); 2241 2242 pushBack( ch ); 2243 } 2244 2245 return el; 2246 } 2247 2248 /** 2249 * Generic escape of next character or entity. 2250 */ 2251 private Element handleTilde() 2252 throws IOException 2253 { 2254 int ch = nextToken(); 2255 2256 if( ch == ' ' ) 2257 { 2258 if( m_wysiwygEditorMode ) 2259 { 2260 m_plainTextBuf.append( "~ " ); 2261 } 2262 return m_currentElement; 2263 } 2264 2265 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2266 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2267 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2268 { 2269 if( m_wysiwygEditorMode ) 2270 { 2271 m_plainTextBuf.append( '~' ); 2272 } 2273 2274 m_plainTextBuf.append( (char)ch ); 2275 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2276 return m_currentElement; 2277 } 2278 2279 // No escape. 2280 pushBack( ch ); 2281 2282 return null; 2283 } 2284 2285 private void fillBuffer( Element startElement ) 2286 throws IOException 2287 { 2288 m_currentElement = startElement; 2289 2290 boolean quitReading = false; 2291 m_newLine = true; 2292 disableOutputEscaping(); 2293 2294 while(!quitReading) 2295 { 2296 int ch = nextToken(); 2297 2298 if( ch == -1 ) break; 2299 2300 // 2301 // Check if we're actually ending the preformatted mode. 2302 // We still must do an entity transformation here. 2303 // 2304 if( m_isEscaping ) 2305 { 2306 if( ch == '}' ) 2307 { 2308 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2309 } 2310 else if( ch == -1 ) 2311 { 2312 quitReading = true; 2313 } 2314 else if( ch == '\r' ) 2315 { 2316 // DOS line feeds we ignore. 2317 } 2318 else if( ch == '<' ) 2319 { 2320 m_plainTextBuf.append( "<" ); 2321 } 2322 else if( ch == '>' ) 2323 { 2324 m_plainTextBuf.append( ">" ); 2325 } 2326 else if( ch == '&' ) 2327 { 2328 m_plainTextBuf.append( "&" ); 2329 } 2330 else if( ch == '~' ) 2331 { 2332 String braces = readWhile("}"); 2333 if( braces.length() >= 3 ) 2334 { 2335 m_plainTextBuf.append("}}}"); 2336 2337 braces = braces.substring(3); 2338 } 2339 else 2340 { 2341 m_plainTextBuf.append( (char) ch ); 2342 } 2343 2344 for( int i = braces.length()-1; i >= 0; i-- ) 2345 { 2346 pushBack(braces.charAt(i)); 2347 } 2348 } 2349 else 2350 { 2351 m_plainTextBuf.append( (char) ch ); 2352 } 2353 2354 continue; 2355 } 2356 2357 // 2358 // An empty line stops a list 2359 // 2360 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2361 { 2362 m_plainTextBuf.append(unwindGeneralList()); 2363 } 2364 2365 if( m_newLine && ch != '|' && m_istable ) 2366 { 2367 popElement("table"); 2368 m_istable = false; 2369 } 2370 2371 int skip = IGNORE; 2372 2373 // 2374 // Do the actual parsing and catch any errors. 2375 // 2376 try 2377 { 2378 skip = parseToken( ch ); 2379 } 2380 catch( IllegalDataException e ) 2381 { 2382 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2383 2384 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2385 } 2386 2387 // 2388 // The idea is as follows: If the handler method returns 2389 // an element (el != null), it is assumed that it has been 2390 // added in the stack. Otherwise the character is added 2391 // as is to the plaintext buffer. 2392 // 2393 // For the transition phase, if s != null, it also gets 2394 // added in the plaintext buffer. 2395 // 2396 2397 switch( skip ) 2398 { 2399 case ELEMENT: 2400 m_newLine = false; 2401 break; 2402 2403 case CHARACTER: 2404 m_plainTextBuf.append( (char) ch ); 2405 m_newLine = false; 2406 break; 2407 2408 case IGNORE: 2409 default: 2410 break; 2411 } 2412 } 2413 2414 closeHeadings(); 2415 popElement("domroot"); 2416 } 2417 2418 private String cleanupSuspectData( String s ) 2419 { 2420 StringBuilder sb = new StringBuilder( s.length() ); 2421 2422 for( int i = 0; i < s.length(); i++ ) 2423 { 2424 char c = s.charAt(i); 2425 2426 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2427 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); 2428 } 2429 2430 return sb.toString(); 2431 } 2432 2433 /** The token is a plain character. */ 2434 protected static final int CHARACTER = 0; 2435 2436 /** The token is a wikimarkup element. */ 2437 protected static final int ELEMENT = 1; 2438 2439 /** The token is to be ignored. */ 2440 protected static final int IGNORE = 2; 2441 2442 /** 2443 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2444 * you think this was a wiki markup element, and IGNORE, if you think 2445 * we should ignore this altogether. 2446 * <p> 2447 * To add your own MarkupParser, you can override this method, but it 2448 * is recommended that you call super.parseToken() as well to gain advantage 2449 * of JSPWiki's own markup. You can call it at the start of your own 2450 * parseToken() or end - it does not matter. 2451 * 2452 * @param ch The character under investigation 2453 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2454 * @throws IOException If parsing fails. 2455 */ 2456 protected int parseToken( int ch ) 2457 throws IOException 2458 { 2459 Element el = null; 2460 2461 // 2462 // Now, check the incoming token. 2463 // 2464 switch( ch ) 2465 { 2466 case '\r': 2467 // DOS linefeeds we forget 2468 return IGNORE; 2469 2470 case '\n': 2471 // 2472 // Close things like headings, etc. 2473 // 2474 2475 // FIXME: This is not really very fast 2476 2477 closeHeadings(); 2478 2479 popElement("dl"); // Close definition lists. 2480 if( m_istable ) 2481 { 2482 popElement("tr"); 2483 } 2484 2485 m_isdefinition = false; 2486 2487 if( m_newLine ) 2488 { 2489 // Paragraph change. 2490 startBlockLevel(); 2491 2492 // 2493 // Figure out which elements cannot be enclosed inside 2494 // a <p></p> pair according to XHTML rules. 2495 // 2496 String nextLine = peekAheadLine(); 2497 if( nextLine.length() == 0 || 2498 (nextLine.length() > 0 && 2499 !nextLine.startsWith("{{{") && 2500 !nextLine.startsWith("----") && 2501 !nextLine.startsWith("%%") && 2502 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2503 { 2504 pushElement( new Element("p") ); 2505 m_isOpenParagraph = true; 2506 2507 if( m_restartitalic ) 2508 { 2509 pushElement( new Element("i") ); 2510 m_isitalic = true; 2511 m_restartitalic = false; 2512 } 2513 if( m_restartbold ) 2514 { 2515 pushElement( new Element("b") ); 2516 m_isbold = true; 2517 m_restartbold = false; 2518 } 2519 } 2520 } 2521 else 2522 { 2523 m_plainTextBuf.append("\n"); 2524 m_newLine = true; 2525 } 2526 return IGNORE; 2527 2528 2529 case '\\': 2530 el = handleBackslash(); 2531 break; 2532 2533 case '_': 2534 el = handleUnderscore(); 2535 break; 2536 2537 case '\'': 2538 el = handleApostrophe(); 2539 break; 2540 2541 case '{': 2542 el = handleOpenbrace( m_newLine ); 2543 break; 2544 2545 case '}': 2546 el = handleClosebrace(); 2547 break; 2548 2549 case '-': 2550 if( m_newLine ) 2551 el = handleDash(); 2552 2553 break; 2554 2555 case '!': 2556 if( m_newLine ) 2557 { 2558 el = handleHeading(); 2559 } 2560 break; 2561 2562 case ';': 2563 if( m_newLine ) 2564 { 2565 el = handleDefinitionList(); 2566 } 2567 break; 2568 2569 case ':': 2570 if( m_isdefinition ) 2571 { 2572 popElement("dt"); 2573 el = pushElement( new Element("dd") ); 2574 m_isdefinition = false; 2575 } 2576 break; 2577 2578 case '[': 2579 el = handleOpenbracket(); 2580 break; 2581 2582 case '*': 2583 if( m_newLine ) 2584 { 2585 pushBack('*'); 2586 el = handleGeneralList(); 2587 } 2588 break; 2589 2590 case '#': 2591 if( m_newLine ) 2592 { 2593 pushBack('#'); 2594 el = handleGeneralList(); 2595 } 2596 break; 2597 2598 case '|': 2599 el = handleBar( m_newLine ); 2600 break; 2601 2602 case '~': 2603 el = handleTilde(); 2604 break; 2605 2606 case '%': 2607 el = handleDiv( m_newLine ); 2608 break; 2609 2610 case '/': 2611 el = handleSlash( m_newLine ); 2612 break; 2613 2614 default: 2615 break; 2616 } 2617 2618 return el != null ? ELEMENT : CHARACTER; 2619 } 2620 2621 private void closeHeadings() 2622 { 2623 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2624 { 2625 // Add the hash anchor element at the end of the heading 2626 addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2627 m_lastHeading = null; 2628 } 2629 popElement("h2"); 2630 popElement("h3"); 2631 popElement("h4"); 2632 } 2633 2634 /** 2635 * Parses the entire document from the Reader given in the constructor or 2636 * set by {@link #setInputReader(Reader)}. 2637 * 2638 * @return A WikiDocument, ready to be passed to the renderer. 2639 * @throws IOException If parsing cannot be accomplished. 2640 */ 2641 public WikiDocument parse() 2642 throws IOException 2643 { 2644 WikiDocument d = new WikiDocument( m_context.getPage() ); 2645 d.setContext( m_context ); 2646 2647 Element rootElement = new Element("domroot"); 2648 2649 d.setRootElement( rootElement ); 2650 2651 fillBuffer( rootElement ); 2652 2653 paragraphify(rootElement); 2654 2655 return d; 2656 } 2657 2658 /** 2659 * Checks out that the first paragraph is correctly installed. 2660 * 2661 * @param rootElement 2662 */ 2663 private void paragraphify(Element rootElement) 2664 { 2665 // 2666 // Add the paragraph tag to the first paragraph 2667 // 2668 List< Content > kids = rootElement.getContent(); 2669 2670 if( rootElement.getChild("p") != null ) 2671 { 2672 ArrayList<Content> ls = new ArrayList<Content>(); 2673 int idxOfFirstContent = 0; 2674 int count = 0; 2675 2676 for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) 2677 { 2678 Content c = i.next(); 2679 if( c instanceof Element ) 2680 { 2681 String name = ( ( Element )c ).getName(); 2682 if( isBlockLevel( name ) ) break; 2683 } 2684 2685 if( !(c instanceof ProcessingInstruction) ) 2686 { 2687 ls.add( c ); 2688 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2689 } 2690 } 2691 2692 // 2693 // If there were any elements, then add a new <p> (unless it would 2694 // be an empty one) 2695 // 2696 if( ls.size() > 0 ) 2697 { 2698 Element newel = new Element("p"); 2699 2700 for( Iterator< Content > i = ls.iterator(); i.hasNext(); ) 2701 { 2702 Content c = i.next(); 2703 2704 c.detach(); 2705 newel.addContent(c); 2706 } 2707 2708 // 2709 // Make sure there are no empty <p/> tags added. 2710 // 2711 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) 2712 rootElement.addContent(idxOfFirstContent, newel); 2713 } 2714 } 2715 } 2716 2717}