001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import java.io.IOException; 022import java.io.Reader; 023import java.io.StringReader; 024import java.text.MessageFormat; 025import java.util.ArrayList; 026import java.util.Arrays; 027import java.util.Collection; 028import java.util.Collections; 029import java.util.Comparator; 030import java.util.EmptyStackException; 031import java.util.HashMap; 032import java.util.Iterator; 033import java.util.List; 034import java.util.Map; 035import java.util.Properties; 036import java.util.ResourceBundle; 037import java.util.Stack; 038 039import javax.xml.transform.Result; 040 041import org.apache.commons.lang.StringEscapeUtils; 042import org.apache.commons.lang.StringUtils; 043import org.apache.log4j.Logger; 044import org.apache.oro.text.GlobCompiler; 045import org.apache.oro.text.regex.MalformedPatternException; 046import org.apache.oro.text.regex.MatchResult; 047import org.apache.oro.text.regex.Pattern; 048import org.apache.oro.text.regex.PatternCompiler; 049import org.apache.oro.text.regex.PatternMatcher; 050import org.apache.oro.text.regex.Perl5Compiler; 051import org.apache.oro.text.regex.Perl5Matcher; 052import org.apache.wiki.InternalWikiException; 053import org.apache.wiki.StringTransmutator; 054import org.apache.wiki.VariableManager; 055import org.apache.wiki.WikiContext; 056import org.apache.wiki.WikiPage; 057import org.apache.wiki.api.exceptions.PluginException; 058import org.apache.wiki.api.exceptions.ProviderException; 059import org.apache.wiki.api.plugin.WikiPlugin; 060import org.apache.wiki.attachment.Attachment; 061import org.apache.wiki.attachment.AttachmentManager; 062import org.apache.wiki.auth.WikiSecurityException; 063import org.apache.wiki.auth.acl.Acl; 064import org.apache.wiki.i18n.InternationalizationManager; 065import org.apache.wiki.preferences.Preferences; 066import org.apache.wiki.render.CleanTextRenderer; 067import org.apache.wiki.render.RenderingManager; 068import org.apache.wiki.util.TextUtil; 069import org.jdom2.Attribute; 070import org.jdom2.Content; 071import org.jdom2.Element; 072import org.jdom2.IllegalDataException; 073import org.jdom2.ProcessingInstruction; 074import org.jdom2.Verifier; 075 076/** 077 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 078 * heart and soul of JSPWiki : make sure you test properly anything that is added, 079 * or else it breaks down horribly. 080 * 081 * @since 2.4 082 */ 083public class JSPWikiMarkupParser extends MarkupParser { 084 085 /** Name of the outlink image; relative path to the JSPWiki directory. */ 086 private static final String OUTLINK_IMAGE = "images/out.png"; 087 088 /** The value for anchor element <tt>class</tt> attributes when used 089 * for wiki page (normal) links. The value is "wikipage". */ 090 public static final String CLASS_WIKIPAGE = "wikipage"; 091 092 /** The value for anchor element <tt>class</tt> attributes when used 093 * for edit page links. The value is "createpage". */ 094 public static final String CLASS_EDITPAGE = "createpage"; 095 096 /** The value for anchor element <tt>class</tt> attributes when used 097 * for interwiki page links. The value is "interwiki". */ 098 public static final String CLASS_INTERWIKI = "interwiki"; 099 100 protected static final int READ = 0; 101 protected static final int EDIT = 1; 102 protected static final int EMPTY = 2; // Empty message 103 protected static final int LOCAL = 3; 104 protected static final int LOCALREF = 4; 105 protected static final int IMAGE = 5; 106 protected static final int EXTERNAL = 6; 107 protected static final int INTERWIKI = 7; 108 protected static final int IMAGELINK = 8; 109 protected static final int IMAGEWIKILINK = 9; 110 protected static final int ATTACHMENT = 10; 111 112 private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); 113 114 private boolean m_isbold = false; 115 private boolean m_isitalic = false; 116 private boolean m_istable = false; 117 private boolean m_isPre = false; 118 private boolean m_isEscaping = false; 119 private boolean m_isdefinition = false; 120 private boolean m_isPreBlock = false; 121 122 /** Contains style information, in multiple forms. */ 123 private Stack<Boolean> m_styleStack = new Stack<Boolean>(); 124 125 // general list handling 126 private int m_genlistlevel = 0; 127 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 128 private boolean m_allowPHPWikiStyleLists = true; 129 130 131 private boolean m_isOpenParagraph = false; 132 133 /** Keeps image regexp Patterns */ 134 private List<Pattern> m_inlineImagePatterns; 135 136 /** Parser for extended link functionality. */ 137 private LinkParser m_linkParser = new LinkParser(); 138 139 private PatternMatcher m_inlineMatcher = new Perl5Matcher(); 140 141 /** Keeps track of any plain text that gets put in the Text nodes */ 142 private StringBuilder m_plainTextBuf = new StringBuilder(20); 143 144 private Element m_currentElement; 145 146 /** Keep track of duplicate header names. */ 147 private Map<String, Integer> m_titleSectionCounter = new HashMap<String, Integer>(); 148 149 /** If true, consider CamelCase hyperlinks as well. */ 150 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 151 152 /** If true, all hyperlinks are translated as well, regardless whether they 153 are surrounded by brackets. */ 154 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 155 156 /** If true, all outward links (external links) have a small link image appended. */ 157 public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage"; 158 159 /** If true, all outward attachment info links have a small link image appended. */ 160 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 161 162 /** If set to "true", all external links are tagged with 'rel="nofollow"' */ 163 public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow"; 164 165 /** If true, then considers CamelCase links as well. */ 166 private boolean m_camelCaseLinks = false; 167 168 /** If true, then generate special output for wysiwyg editing in certain cases */ 169 private boolean m_wysiwygEditorMode = false; 170 171 /** If true, consider URIs that have no brackets as well. */ 172 // FIXME: Currently reserved, but not used. 173 private boolean m_plainUris = false; 174 175 /** If true, all outward links use a small link image. */ 176 private boolean m_useOutlinkImage = true; 177 178 private boolean m_useAttachmentImage = true; 179 180 /** If true, allows raw HTML. */ 181 private boolean m_allowHTML = false; 182 183 private boolean m_useRelNofollow = false; 184 185 private PatternCompiler m_compiler = new Perl5Compiler(); 186 187 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 188 189 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 190 private Pattern m_camelCasePattern; 191 192 private int m_rowNum = 1; 193 194 private Heading m_lastHeading = null; 195 196 /** 197 * This list contains all IANA registered URI protocol 198 * types as of September 2004 + a few well-known extra types. 199 * 200 * JSPWiki recognises all of them as external links. 201 * 202 * This array is sorted during class load, so you can just dump 203 * here whatever you want in whatever order you want. 204 */ 205 static final String[] EXTERNAL_LINKS = { 206 "http:", "ftp:", "https:", "mailto:", 207 "news:", "file:", "rtsp:", "mms:", "ldap:", 208 "gopher:", "nntp:", "telnet:", "wais:", 209 "prospero:", "z39.50s", "z39.50r", "vemmi:", 210 "imap:", "nfs:", "acap:", "tip:", "pop:", 211 "dav:", "opaquelocktoken:", "sip:", "sips:", 212 "tel:", "fax:", "modem:", "soap.beep:", "soap.beeps", 213 "xmlrpc.beep", "xmlrpc.beeps", "urn:", "go:", 214 "h323:", "ipp:", "tftp:", "mupdate:", "pres:", 215 "im:", "mtqp", "smb:" }; 216 217 private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns"; 218 219 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 220 221 private static final String[] CLASS_TYPES = 222 { 223 CLASS_WIKIPAGE, 224 CLASS_EDITPAGE, 225 "", 226 "footnote", 227 "footnoteref", 228 "", 229 "external", 230 CLASS_INTERWIKI, 231 "external", 232 CLASS_WIKIPAGE, 233 "attachment" 234 }; 235 236 237 /** 238 * This Comparator is used to find an external link from c_externalLinks. It 239 * checks if the link starts with the other arraythingie. 240 */ 241 private static Comparator<String> c_startingComparator = new StartingComparator(); 242 243 static 244 { 245 Arrays.sort( EXTERNAL_LINKS ); 246 } 247 248 /** 249 * Creates a markup parser. 250 * 251 * @param context The WikiContext which controls the parsing 252 * @param in Where the data is read from. 253 */ 254 public JSPWikiMarkupParser( WikiContext context, Reader in ) 255 { 256 super( context, in ); 257 initialize(); 258 } 259 260 // FIXME: parsers should be pooled for better performance. 261 @SuppressWarnings("unchecked") 262 private void initialize() 263 { 264 PatternCompiler compiler = new GlobCompiler(); 265 List<Pattern> compiledpatterns; 266 267 // 268 // We cache compiled patterns in the engine, since their creation is 269 // really expensive 270 // 271 compiledpatterns = (List<Pattern>)m_engine.getAttribute( INLINE_IMAGE_PATTERNS ); 272 273 if( compiledpatterns == null ) 274 { 275 compiledpatterns = new ArrayList<Pattern>(20); 276 Collection< String > ptrns = m_engine.getAllInlinedImagePatterns(); 277 278 // 279 // Make them into Regexp Patterns. Unknown patterns 280 // are ignored. 281 // 282 for( Iterator< String > i = ptrns.iterator(); i.hasNext(); ) 283 { 284 try 285 { 286 compiledpatterns.add( compiler.compile( i.next(), 287 GlobCompiler.DEFAULT_MASK|GlobCompiler.READ_ONLY_MASK ) ); 288 } 289 catch( MalformedPatternException e ) 290 { 291 log.error("Malformed pattern in properties: ", e ); 292 } 293 } 294 295 m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns ); 296 } 297 298 m_inlineImagePatterns = Collections.unmodifiableList(compiledpatterns); 299 300 m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN ); 301 if( m_camelCasePattern == null ) 302 { 303 try 304 { 305 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX, 306 Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 307 } 308 catch( MalformedPatternException e ) 309 { 310 log.fatal("Internal error: Someone put in a faulty pattern.",e); 311 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader"); 312 } 313 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 314 } 315 // 316 // Set the properties. 317 // 318 Properties props = m_engine.getWikiProperties(); 319 320 String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 321 322 if( cclinks != null ) 323 { 324 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 325 } 326 else 327 { 328 m_camelCaseLinks = TextUtil.getBooleanProperty( props, 329 PROP_CAMELCASELINKS, 330 m_camelCaseLinks ); 331 } 332 333 334 335 Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE ); 336 if( wysiwygVariable != null ) 337 { 338 m_wysiwygEditorMode = wysiwygVariable.booleanValue(); 339 } 340 341 m_plainUris = getLocalBooleanProperty( m_context, 342 props, 343 PROP_PLAINURIS, 344 m_plainUris ); 345 m_useOutlinkImage = getLocalBooleanProperty( m_context, 346 props, 347 PROP_USEOUTLINKIMAGE, 348 m_useOutlinkImage ); 349 m_useAttachmentImage = getLocalBooleanProperty( m_context, 350 props, 351 PROP_USEATTACHMENTIMAGE, 352 m_useAttachmentImage ); 353 m_allowHTML = getLocalBooleanProperty( m_context, 354 props, 355 MarkupParser.PROP_ALLOWHTML, 356 m_allowHTML ); 357 358 m_useRelNofollow = getLocalBooleanProperty( m_context, 359 props, 360 PROP_USERELNOFOLLOW, 361 m_useRelNofollow ); 362 363 if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null ) 364 { 365 disableAccessRules(); 366 } 367 368 m_context.getPage().setHasMetadata(); 369 } 370 371 /** 372 * This is just a simple helper method which will first check the context 373 * if there is already an override in place, and if there is not, 374 * it will then check the given properties. 375 * 376 * @param context WikiContext to check first 377 * @param props Properties to check next 378 * @param key What key are we searching for? 379 * @param defValue Default value for the boolean 380 * @return True or false 381 */ 382 private static boolean getLocalBooleanProperty( WikiContext context, 383 Properties props, 384 String key, 385 boolean defValue ) 386 { 387 Object bool = context.getVariable(key); 388 389 if( bool != null ) 390 { 391 return TextUtil.isPositive( (String) bool ); 392 } 393 394 return TextUtil.getBooleanProperty( props, key, defValue ); 395 } 396 397 /** 398 * Returns link name, if it exists; otherwise it returns null. 399 */ 400 private String linkExists( String page ) 401 { 402 try 403 { 404 if( page == null || page.length() == 0 ) return null; 405 406 return m_engine.getFinalPageName( page ); 407 } 408 catch( ProviderException e ) 409 { 410 log.warn("TranslatorReader got a faulty page name!",e); 411 412 return page; // FIXME: What would be the correct way to go back? 413 } 414 } 415 416 /** 417 * Calls a transmutator chain. 418 * 419 * @param list Chain to call 420 * @param text Text that should be passed to the mutate() method 421 * of each of the mutators in the chain. 422 * @return The result of the mutation. 423 */ 424 425 protected String callMutatorChain( Collection list, String text ) 426 { 427 if( list == null || list.size() == 0 ) 428 { 429 return text; 430 } 431 432 for( Iterator i = list.iterator(); i.hasNext(); ) 433 { 434 StringTransmutator m = (StringTransmutator) i.next(); 435 436 text = m.mutate( m_context, text ); 437 } 438 439 return text; 440 } 441 442 /** 443 * Calls the heading listeners. 444 * 445 * @param param A Heading object. 446 */ 447 protected void callHeadingListenerChain( Heading param ) 448 { 449 List< HeadingListener > list = m_headingListenerChain; 450 451 for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); ) 452 { 453 HeadingListener h = i.next(); 454 455 h.headingAdded( m_context, param ); 456 } 457 } 458 459 /** 460 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 461 * if you really know what you are doing. 462 * 463 * @param type One of the types above 464 * @param link URL to which to link to 465 * @param text Link text 466 * @param section If a particular section identifier is required. 467 * @return An A element. 468 * @since 2.4.78 469 */ 470 protected Element createAnchor(int type, String link, String text, String section) 471 { 472 text = escapeHTMLEntities( text ); 473 section = escapeHTMLEntities( section ); 474 Element el = new Element("a"); 475 el.setAttribute("class",CLASS_TYPES[type]); 476 el.setAttribute("href",link+section); 477 el.addContent(text); 478 return el; 479 } 480 481 private Element makeLink( int type, String link, String text, String section, Iterator attributes ) 482 { 483 Element el = null; 484 485 if( text == null ) text = link; 486 487 text = callMutatorChain( m_linkMutators, text ); 488 489 section = (section != null) ? ("#"+section) : ""; 490 491 // Make sure we make a link name that can be accepted 492 // as a valid URL. 493 494 if( link.length() == 0 ) 495 { 496 type = EMPTY; 497 } 498 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 499 500 switch(type) 501 { 502 case READ: 503 el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section ); 504 break; 505 506 case EDIT: 507 el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" ); 508 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 509 510 break; 511 512 case EMPTY: 513 el = new Element("u").addContent(text); 514 break; 515 516 // 517 // These two are for local references - footnotes and 518 // references to footnotes. 519 // We embed the page name (or whatever WikiContext gives us) 520 // to make sure the links are unique across Wiki. 521 // 522 case LOCALREF: 523 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 524 break; 525 526 case LOCAL: 527 el = new Element("a").setAttribute("class","footnote"); 528 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 529 el.addContent("["+text+"]"); 530 break; 531 532 // 533 // With the image, external and interwiki types we need to 534 // make sure nobody can put in Javascript or something else 535 // annoying into the links themselves. We do this by preventing 536 // a haxor from stopping the link name short with quotes in 537 // fillBuffer(). 538 // 539 case IMAGE: 540 el = new Element("img").setAttribute("class","inline"); 541 el.setAttribute("src",link); 542 el.setAttribute("alt",text); 543 break; 544 545 case IMAGELINK: 546 el = new Element("img").setAttribute("class","inline"); 547 el.setAttribute("src",link); 548 el.setAttribute("alt",text); 549 el = createAnchor(IMAGELINK,text,"","").addContent(el); 550 break; 551 552 case IMAGEWIKILINK: 553 String pagelink = m_context.getURL(WikiContext.VIEW,text); 554 el = new Element("img").setAttribute("class","inline"); 555 el.setAttribute("src",link); 556 el.setAttribute("alt",text); 557 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 558 break; 559 560 case EXTERNAL: 561 el = createAnchor( EXTERNAL, link, text, section ); 562 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 563 break; 564 565 case INTERWIKI: 566 el = createAnchor( INTERWIKI, link, text, section ); 567 break; 568 569 case ATTACHMENT: 570 String attlink = m_context.getURL( WikiContext.ATTACH, 571 link ); 572 573 String infolink = m_context.getURL( WikiContext.INFO, 574 link ); 575 576 String imglink = m_context.getURL( WikiContext.NONE, 577 "images/attachment_small.png" ); 578 579 el = createAnchor( ATTACHMENT, attlink, text, "" ); 580 581 pushElement(el); 582 popElement(el.getName()); 583 584 if( m_useAttachmentImage ) 585 { 586 el = new Element("img").setAttribute("src",imglink); 587 el.setAttribute("border","0"); 588 el.setAttribute("alt","(info)"); 589 590 el = new Element("a").setAttribute("href",infolink).addContent(el); 591 el.setAttribute("class","infolink"); 592 } 593 else 594 { 595 el = null; 596 } 597 break; 598 599 default: 600 break; 601 } 602 603 if( el != null && attributes != null ) 604 { 605 while( attributes.hasNext() ) 606 { 607 Attribute attr = (Attribute)attributes.next(); 608 if( attr != null ) 609 { 610 el.setAttribute(attr); 611 } 612 } 613 } 614 615 if( el != null ) 616 { 617 flushPlainText(); 618 m_currentElement.addContent( el ); 619 } 620 return el; 621 } 622 623 624 /** 625 * Figures out if a link is an off-site link. This recognizes 626 * the most common protocols by checking how it starts. 627 * 628 * @param link The link to check. 629 * @return true, if this is a link outside of this wiki. 630 * @since 2.4 631 */ 632 633 public static boolean isExternalLink( String link ) 634 { 635 int idx = Arrays.binarySearch( EXTERNAL_LINKS, link, 636 c_startingComparator ); 637 638 // 639 // We need to check here once again; otherwise we might 640 // get a match for something like "h". 641 // 642 if( idx >= 0 && link.startsWith(EXTERNAL_LINKS[idx]) ) return true; 643 644 return false; 645 } 646 647 /** 648 * Returns true, if the link in question is an access 649 * rule. 650 */ 651 private static boolean isAccessRule( String link ) 652 { 653 return link.startsWith("{ALLOW") || link.startsWith("{DENY"); 654 } 655 656 /** 657 * Returns true if the link is really command to insert 658 * a plugin. 659 * <P> 660 * Currently we just check if the link starts with "{INSERT", 661 * or just plain "{" but not "{$". 662 * 663 * @param link Link text, i.e. the contents of text between []. 664 * @return True, if this link seems to be a command to insert a plugin here. 665 */ 666 public static boolean isPluginLink( String link ) 667 { 668 return link.startsWith( "{INSERT" ) || 669 ( link.startsWith( "{" ) && !link.startsWith( "{$" ) ); 670 } 671 672 /** 673 * Matches the given link to the list of image name patterns 674 * to determine whether it should be treated as an inline image 675 * or not. 676 */ 677 private boolean isImageLink( String link ) 678 { 679 if( m_inlineImages ) 680 { 681 link = link.toLowerCase(); 682 683 for( Iterator< Pattern > i = m_inlineImagePatterns.iterator(); i.hasNext(); ) 684 { 685 if( m_inlineMatcher.matches( link, i.next() ) ) 686 return true; 687 } 688 } 689 690 return false; 691 } 692 693 private static boolean isMetadata( String link ) 694 { 695 return link.startsWith("{SET"); 696 } 697 698 /** 699 * These are all of the HTML 4.01 block-level elements. 700 */ 701 private static final String[] BLOCK_ELEMENTS = { 702 "address", "blockquote", "div", "dl", "fieldset", "form", 703 "h1", "h2", "h3", "h4", "h5", "h6", 704 "hr", "noscript", "ol", "p", "pre", "table", "ul" 705 }; 706 707 private static boolean isBlockLevel( String name ) 708 { 709 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 710 } 711 712 /** 713 * This method peeks ahead in the stream until EOL and returns the result. 714 * It will keep the buffers untouched. 715 * 716 * @return The string from the current position to the end of line. 717 */ 718 719 // FIXME: Always returns an empty line, even if the stream is full. 720 private String peekAheadLine() 721 throws IOException 722 { 723 String s = readUntilEOL().toString(); 724 725 if( s.length() > PUSHBACK_BUFFER_SIZE ) 726 { 727 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 728 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 729 } 730 else 731 { 732 try 733 { 734 pushBack( s ); 735 } 736 catch( IOException e ) 737 { 738 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 739 } 740 } 741 return s; 742 } 743 744 private int flushPlainText() 745 { 746 int numChars = m_plainTextBuf.length(); 747 748 if( numChars > 0 ) 749 { 750 String buf; 751 752 if( !m_allowHTML ) 753 { 754 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 755 } 756 else 757 { 758 buf = m_plainTextBuf.toString(); 759 } 760 // 761 // We must first empty the buffer because the side effect of 762 // calling makeCamelCaseLink() is to call this routine. 763 // 764 765 m_plainTextBuf = new StringBuilder(20); 766 767 try 768 { 769 // 770 // This is the heaviest part of parsing, and therefore we can 771 // do some optimization here. 772 // 773 // 1) Only when the length of the buffer is big enough, we try to do the match 774 // 775 776 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 777 { 778 // System.out.println("Buffer="+buf); 779 780 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 781 { 782 MatchResult result = m_camelCaseMatcher.getMatch(); 783 784 String firstPart = buf.substring(0,result.beginOffset(0)); 785 String prefix = result.group(1); 786 787 if( prefix == null ) prefix = ""; 788 789 String camelCase = result.group(2); 790 String protocol = result.group(3); 791 String uri = protocol+result.group(4); 792 buf = buf.substring(result.endOffset(0)); 793 794 m_currentElement.addContent( firstPart ); 795 796 // 797 // Check if the user does not wish to do URL or WikiWord expansion 798 // 799 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 800 { 801 if( prefix.endsWith("~") ) 802 { 803 if( m_wysiwygEditorMode ) 804 { 805 m_currentElement.addContent( "~" ); 806 } 807 prefix = prefix.substring(0,prefix.length()-1); 808 } 809 if( camelCase != null ) 810 { 811 m_currentElement.addContent( prefix+camelCase ); 812 } 813 else if( protocol != null ) 814 { 815 m_currentElement.addContent( prefix+uri ); 816 } 817 continue; 818 } 819 820 // 821 // Fine, then let's check what kind of a link this was 822 // and emit the proper elements 823 // 824 if( protocol != null ) 825 { 826 char c = uri.charAt(uri.length()-1); 827 if( c == '.' || c == ',' ) 828 { 829 uri = uri.substring(0,uri.length()-1); 830 buf = c + buf; 831 } 832 // System.out.println("URI match "+uri); 833 m_currentElement.addContent( prefix ); 834 makeDirectURILink( uri ); 835 } 836 else 837 { 838 // System.out.println("Matched: '"+camelCase+"'"); 839 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 840 // System.out.println("prefix="+prefix); 841 m_currentElement.addContent( prefix ); 842 843 makeCamelCaseLink( camelCase ); 844 } 845 } 846 847 m_currentElement.addContent( buf ); 848 } 849 else 850 { 851 // 852 // No camelcase asked for, just add the elements 853 // 854 m_currentElement.addContent( buf ); 855 } 856 } 857 catch( IllegalDataException e ) 858 { 859 // 860 // Sometimes it's possible that illegal XML chars is added to the data. 861 // Here we make sure it does not stop parsing. 862 // 863 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 864 } 865 } 866 867 return numChars; 868 } 869 870 /** 871 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 872 * entities that are already escaped). 873 * 874 * @param buf 875 * @return An escaped string. 876 */ 877 private String escapeHTMLEntities(String buf) 878 { 879 StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 880 881 for( int i = 0; i < buf.length(); i++ ) 882 { 883 char ch = buf.charAt(i); 884 885 if( ch == '<' ) 886 { 887 tmpBuf.append("<"); 888 } 889 else if( ch == '>' ) 890 { 891 tmpBuf.append(">"); 892 } 893 else if( ch == '\"' ) 894 { 895 tmpBuf.append("""); 896 } 897 else if( ch == '&' ) 898 { 899 // 900 // If the following is an XML entity reference (&#.*;) we'll 901 // leave it as it is; otherwise we'll replace it with an & 902 // 903 904 boolean isEntity = false; 905 StringBuilder entityBuf = new StringBuilder(); 906 907 if( i < buf.length() -1 ) 908 { 909 for( int j = i; j < buf.length(); j++ ) 910 { 911 char ch2 = buf.charAt(j); 912 913 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 914 { 915 entityBuf.append(ch2); 916 917 if( ch2 == ';' ) 918 { 919 isEntity = true; 920 break; 921 } 922 } 923 else 924 { 925 break; 926 } 927 } 928 } 929 930 if( isEntity ) 931 { 932 tmpBuf.append( entityBuf ); 933 i = i + entityBuf.length() - 1; 934 } 935 else 936 { 937 tmpBuf.append("&"); 938 } 939 940 } 941 else 942 { 943 tmpBuf.append( ch ); 944 } 945 } 946 947 return tmpBuf.toString(); 948 } 949 950 private Element pushElement( Element e ) 951 { 952 flushPlainText(); 953 m_currentElement.addContent( e ); 954 m_currentElement = e; 955 956 return e; 957 } 958 959 private Element addElement( Content e ) 960 { 961 if( e != null ) 962 { 963 flushPlainText(); 964 m_currentElement.addContent( e ); 965 } 966 return m_currentElement; 967 } 968 969 /** 970 * All elements that can be empty by the HTML DTD. 971 */ 972 // Keep sorted. 973 private static final String[] EMPTY_ELEMENTS = { 974 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 975 }; 976 977 /** 978 * Goes through the current element stack and pops all elements until this 979 * element is found - this essentially "closes" and element. 980 * 981 * @param s 982 * @return The new current element, or null, if there was no such element in the entire stack. 983 */ 984 private Element popElement( String s ) 985 { 986 int flushedBytes = flushPlainText(); 987 988 Element currEl = m_currentElement; 989 990 while( currEl.getParentElement() != null ) 991 { 992 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 993 { 994 m_currentElement = currEl.getParentElement(); 995 996 // 997 // Check if it's okay for this element to be empty. Then we will 998 // trick the JDOM generator into not generating an empty element, 999 // by putting an empty string between the tags. Yes, it's a kludge 1000 // but what'cha gonna do about it. :-) 1001 // 1002 1003 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 1004 { 1005 currEl.addContent(""); 1006 } 1007 1008 return m_currentElement; 1009 } 1010 1011 currEl = currEl.getParentElement(); 1012 } 1013 1014 return null; 1015 } 1016 1017 1018 /** 1019 * Reads the stream until it meets one of the specified 1020 * ending characters, or stream end. The ending character will be left 1021 * in the stream. 1022 */ 1023 private String readUntil( String endChars ) 1024 throws IOException 1025 { 1026 StringBuilder sb = new StringBuilder( 80 ); 1027 int ch = nextToken(); 1028 1029 while( ch != -1 ) 1030 { 1031 if( ch == '\\' ) 1032 { 1033 ch = nextToken(); 1034 if( ch == -1 ) 1035 { 1036 break; 1037 } 1038 } 1039 else 1040 { 1041 if( endChars.indexOf((char)ch) != -1 ) 1042 { 1043 pushBack( ch ); 1044 break; 1045 } 1046 } 1047 sb.append( (char) ch ); 1048 ch = nextToken(); 1049 } 1050 1051 return sb.toString(); 1052 } 1053 1054 /** 1055 * Reads the stream while the characters that have been specified are 1056 * in the stream, returning then the result as a String. 1057 */ 1058 private String readWhile( String endChars ) 1059 throws IOException 1060 { 1061 StringBuilder sb = new StringBuilder( 80 ); 1062 int ch = nextToken(); 1063 1064 while( ch != -1 ) 1065 { 1066 if( endChars.indexOf((char)ch) == -1 ) 1067 { 1068 pushBack( ch ); 1069 break; 1070 } 1071 1072 sb.append( (char) ch ); 1073 ch = nextToken(); 1074 } 1075 1076 return sb.toString(); 1077 } 1078 1079 private JSPWikiMarkupParser m_cleanTranslator; 1080 1081 /** 1082 * Does a lazy init. Otherwise, we would get into a situation 1083 * where HTMLRenderer would try and boot a TranslatorReader before 1084 * the TranslatorReader it is contained by is up. 1085 */ 1086 private JSPWikiMarkupParser getCleanTranslator() 1087 { 1088 if( m_cleanTranslator == null ) 1089 { 1090 WikiContext dummyContext = new WikiContext( m_engine, 1091 m_context.getHttpRequest(), 1092 m_context.getPage() ); 1093 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 1094 1095 m_cleanTranslator.m_allowHTML = true; 1096 } 1097 1098 return m_cleanTranslator; 1099 } 1100 /** 1101 * Modifies the "hd" parameter to contain proper values. Because 1102 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 1103 * % after url encoding with '_'. 1104 * <p> 1105 * Counts also duplicate headings (= headings with similar name), and 1106 * attaches a counter. 1107 */ 1108 private String makeHeadingAnchor( String baseName, String title, Heading hd ) 1109 { 1110 hd.m_titleText = title; 1111 title = MarkupParser.wikifyLink( title ); 1112 1113 hd.m_titleSection = m_engine.encodeName(title); 1114 1115 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) 1116 { 1117 Integer count = m_titleSectionCounter.get( hd.m_titleSection ); 1118 count = count + 1; 1119 m_titleSectionCounter.put( hd.m_titleSection, count ); 1120 hd.m_titleSection += "-" + count; 1121 } 1122 else 1123 { 1124 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 1125 } 1126 1127 hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+ 1128 "-"+hd.m_titleSection; 1129 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 1130 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 1131 1132 return hd.m_titleAnchor; 1133 } 1134 1135 private String makeSectionTitle( String title ) 1136 { 1137 title = title.trim(); 1138 String outTitle; 1139 1140 try 1141 { 1142 JSPWikiMarkupParser dtr = getCleanTranslator(); 1143 dtr.setInputReader( new StringReader(title) ); 1144 1145 CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse()); 1146 1147 outTitle = ctt.getString(); 1148 } 1149 catch( IOException e ) 1150 { 1151 log.fatal("CleanTranslator not working", e); 1152 throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() ); 1153 } 1154 1155 return outTitle; 1156 } 1157 1158 /** 1159 * Returns XHTML for the heading. 1160 * 1161 * @param level The level of the heading. @see Heading 1162 * @param title the title for the heading 1163 * @param hd a List to which heading should be added 1164 * @return An Element containing the heading 1165 */ 1166 public Element makeHeading( int level, String title, Heading hd ) 1167 { 1168 Element el = null; 1169 1170 String pageName = m_context.getPage().getName(); 1171 1172 String outTitle = makeSectionTitle( title ); 1173 1174 hd.m_level = level; 1175 1176 switch( level ) 1177 { 1178 case Heading.HEADING_SMALL: 1179 el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 1180 break; 1181 1182 case Heading.HEADING_MEDIUM: 1183 el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 1184 break; 1185 1186 case Heading.HEADING_LARGE: 1187 el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 1188 break; 1189 1190 default: 1191 throw new InternalWikiException("Illegal heading type "+level); 1192 } 1193 1194 1195 return el; 1196 } 1197 1198 /** 1199 * When given a link to a WikiName, we just return 1200 * a proper HTML link for it. The local link mutator 1201 * chain is also called. 1202 */ 1203 private Element makeCamelCaseLink( String wikiname ) 1204 { 1205 String matchedLink; 1206 1207 callMutatorChain( m_localLinkMutatorChain, wikiname ); 1208 1209 if( (matchedLink = linkExists( wikiname )) != null ) 1210 { 1211 makeLink( READ, matchedLink, wikiname, null, null ); 1212 } 1213 else 1214 { 1215 makeLink( EDIT, wikiname, wikiname, null, null ); 1216 } 1217 1218 return m_currentElement; 1219 } 1220 1221 /** Holds the image URL for the duration of this parser */ 1222 private String m_outlinkImageURL = null; 1223 1224 /** 1225 * Returns an element for the external link image (out.png). However, 1226 * this method caches the URL for the lifetime of this MarkupParser, 1227 * because it's commonly used, and we'll end up with possibly hundreds 1228 * our thousands of references to it... It's a lot faster, too. 1229 * 1230 * @return An element containing the HTML for the outlink image. 1231 */ 1232 private Element outlinkImage() 1233 { 1234 Element el = null; 1235 1236 if( m_useOutlinkImage ) 1237 { 1238 if( m_outlinkImageURL == null ) 1239 { 1240 m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE ); 1241 } 1242 1243 el = new Element("img").setAttribute("class", "outlink"); 1244 el.setAttribute( "src", m_outlinkImageURL ); 1245 el.setAttribute("alt",""); 1246 } 1247 1248 return el; 1249 } 1250 1251 /** 1252 * Takes an URL and turns it into a regular wiki link. Unfortunately, 1253 * because of the way that flushPlainText() works, it already encodes 1254 * all of the XML entities. But so does WikiContext.getURL(), so we 1255 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 1256 * <p> 1257 * What a crappy problem. 1258 * 1259 * @param url 1260 * @return An anchor Element containing the link. 1261 */ 1262 private Element makeDirectURILink( String url ) 1263 { 1264 Element result; 1265 String last = null; 1266 1267 if( url.endsWith(",") || url.endsWith(".") ) 1268 { 1269 last = url.substring( url.length()-1 ); 1270 url = url.substring( 0, url.length()-1 ); 1271 } 1272 1273 callMutatorChain( m_externalLinkMutatorChain, url ); 1274 1275 if( isImageLink( url ) ) 1276 { 1277 result = handleImageLink( StringUtils.replace(url,"&","&"), url, false ); 1278 } 1279 else 1280 { 1281 result = makeLink( EXTERNAL, StringUtils.replace(url,"&","&"), url, null, null ); 1282 addElement( outlinkImage() ); 1283 } 1284 1285 if( last != null ) 1286 { 1287 m_plainTextBuf.append(last); 1288 } 1289 1290 return result; 1291 } 1292 1293 /** 1294 * Image links are handled differently: 1295 * 1. If the text is a WikiName of an existing page, 1296 * it gets linked. 1297 * 2. If the text is an external link, then it is inlined. 1298 * 3. Otherwise it becomes an ALT text. 1299 * 1300 * @param reallink The link to the image. 1301 * @param link Link text portion, may be a link to somewhere else. 1302 * @param hasLinkText If true, then the defined link had a link text available. 1303 * This means that the link text may be a link to a wiki page, 1304 * or an external resource. 1305 */ 1306 1307 // FIXME: isExternalLink() is called twice. 1308 private Element handleImageLink( String reallink, String link, boolean hasLinkText ) 1309 { 1310 String possiblePage = MarkupParser.cleanLink( link ); 1311 1312 if( isExternalLink( link ) && hasLinkText ) 1313 { 1314 return makeLink( IMAGELINK, reallink, link, null, null ); 1315 } 1316 else if( ( linkExists( possiblePage ) ) != null && 1317 hasLinkText ) 1318 { 1319 // System.out.println("Orig="+link+", Matched: "+matchedLink); 1320 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 1321 1322 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 1323 } 1324 else 1325 { 1326 return makeLink( IMAGE, reallink, link, null, null ); 1327 } 1328 } 1329 1330 private Element handleAccessRule( String ruleLine ) 1331 { 1332 if( m_wysiwygEditorMode ) 1333 { 1334 m_currentElement.addContent( "[" + ruleLine + "]" ); 1335 } 1336 1337 if( !m_parseAccessRules ) return m_currentElement; 1338 Acl acl; 1339 WikiPage page = m_context.getRealPage(); 1340 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1341 1342 if( ruleLine.startsWith( "{" ) ) 1343 ruleLine = ruleLine.substring( 1 ); 1344 if( ruleLine.endsWith( "}" ) ) 1345 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1346 1347 if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine); 1348 1349 try 1350 { 1351 acl = m_engine.getAclManager().parseAcl( page, ruleLine ); 1352 1353 page.setAcl( acl ); 1354 1355 if( log.isDebugEnabled() ) log.debug( acl.toString() ); 1356 } 1357 catch( WikiSecurityException wse ) 1358 { 1359 return makeError( wse.getMessage() ); 1360 } 1361 1362 return m_currentElement; 1363 } 1364 1365 /** 1366 * Handles metadata setting [{SET foo=bar}] 1367 */ 1368 private Element handleMetadata( String link ) 1369 { 1370 if( m_wysiwygEditorMode ) 1371 { 1372 m_currentElement.addContent( "[" + link + "]" ); 1373 } 1374 1375 try 1376 { 1377 String args = link.substring( link.indexOf(' '), link.length()-1 ); 1378 1379 String name = args.substring( 0, args.indexOf('=') ); 1380 String val = args.substring( args.indexOf('=')+1, args.length() ); 1381 1382 name = name.trim(); 1383 val = val.trim(); 1384 1385 if( val.startsWith("'") ) val = val.substring( 1 ); 1386 if( val.endsWith("'") ) val = val.substring( 0, val.length()-1 ); 1387 1388 // log.debug("SET name='"+name+"', value='"+val+"'."); 1389 1390 if( name.length() > 0 && val.length() > 0 ) 1391 { 1392 val = m_engine.getVariableManager().expandVariables( m_context, 1393 val ); 1394 1395 m_context.getPage().setAttribute( name, val ); 1396 } 1397 } 1398 catch( Exception e ) 1399 { 1400 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1401 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1402 } 1403 1404 return m_currentElement; 1405 } 1406 1407 /** 1408 * Emits a processing instruction that will disable markup escaping. This is 1409 * very useful if you want to emit HTML directly into the stream. 1410 * 1411 */ 1412 private void disableOutputEscaping() 1413 { 1414 addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") ); 1415 } 1416 1417 /** 1418 * Gobbles up all hyperlinks that are encased in square brackets. 1419 */ 1420 private Element handleHyperlinks( String linktext, int pos ) 1421 { 1422 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1423 1424 StringBuilder sb = new StringBuilder(linktext.length()+80); 1425 1426 if( isAccessRule( linktext ) ) 1427 { 1428 return handleAccessRule( linktext ); 1429 } 1430 1431 if( isMetadata( linktext ) ) 1432 { 1433 return handleMetadata( linktext ); 1434 } 1435 1436 if( isPluginLink( linktext ) ) 1437 { 1438 try 1439 { 1440 PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1441 // 1442 // This might sometimes fail, especially if there is something which looks 1443 // like a plugin invocation but is really not. 1444 // 1445 if( pluginContent != null ) 1446 { 1447 addElement( pluginContent ); 1448 1449 pluginContent.executeParse( m_context ); 1450 } 1451 } 1452 catch( PluginException e ) 1453 { 1454 log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 1455 //log.info( "Root cause:",e.getRootThrowable() ); 1456 if( !m_wysiwygEditorMode ) 1457 { 1458 ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1459 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 1460 m_context.getRealPage().getWiki(), 1461 m_context.getRealPage().getName(), 1462 e.getMessage() ) ) ); 1463 } 1464 } 1465 1466 return m_currentElement; 1467 } 1468 1469 try 1470 { 1471 LinkParser.Link link = m_linkParser.parse(linktext); 1472 linktext = link.getText(); 1473 String linkref = link.getReference(); 1474 1475 // 1476 // Yes, we now have the components separated. 1477 // linktext = the text the link should have 1478 // linkref = the url or page name. 1479 // 1480 // In many cases these are the same. [linktext|linkref]. 1481 // 1482 if( VariableManager.isVariableLink( linktext ) ) 1483 { 1484 Content el = new VariableContent(linktext); 1485 1486 addElement( el ); 1487 } 1488 else if( isExternalLink( linkref ) ) 1489 { 1490 // It's an external link, out of this Wiki 1491 1492 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1493 1494 if( isImageLink( linkref ) ) 1495 { 1496 handleImageLink( linkref, linktext, link.hasReference() ); 1497 } 1498 else 1499 { 1500 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1501 addElement( outlinkImage() ); 1502 } 1503 } 1504 else if( link.isInterwikiLink() ) 1505 { 1506 // It's an interwiki link 1507 // InterWiki links also get added to external link chain 1508 // after the links have been resolved. 1509 1510 // FIXME: There is an interesting issue here: We probably should 1511 // URLEncode the wikiPage, but we can't since some of the 1512 // Wikis use slashes (/), which won't survive URLEncoding. 1513 // Besides, we don't know which character set the other Wiki 1514 // is using, so you'll have to write the entire name as it appears 1515 // in the URL. Bugger. 1516 1517 String extWiki = link.getExternalWiki(); 1518 String wikiPage = link.getExternalWikiPage(); 1519 1520 if( m_wysiwygEditorMode ) 1521 { 1522 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1523 } 1524 else 1525 { 1526 String urlReference = m_engine.getInterWikiURL( extWiki ); 1527 1528 if( urlReference != null ) 1529 { 1530 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1531 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1532 1533 if( isImageLink(urlReference) ) 1534 { 1535 handleImageLink( urlReference, linktext, link.hasReference() ); 1536 } 1537 else 1538 { 1539 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1540 } 1541 1542 if( isExternalLink(urlReference) ) 1543 { 1544 addElement( outlinkImage() ); 1545 } 1546 } 1547 else 1548 { 1549 Object[] args = { extWiki }; 1550 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1551 } 1552 } 1553 } 1554 else if( linkref.startsWith("#") ) 1555 { 1556 // It defines a local footnote 1557 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1558 } 1559 else if( TextUtil.isNumber( linkref ) ) 1560 { 1561 // It defines a reference to a local footnote 1562 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1563 } 1564 else 1565 { 1566 int hashMark = -1; 1567 1568 // 1569 // Internal wiki link, but is it an attachment link? 1570 // 1571 String attachment = findAttachment( linkref ); 1572 if( attachment != null ) 1573 { 1574 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1575 1576 if( isImageLink( linkref ) ) 1577 { 1578 attachment = m_context.getURL( WikiContext.ATTACH, attachment ); 1579 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1580 } 1581 else 1582 { 1583 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1584 } 1585 } 1586 else if( (hashMark = linkref.indexOf('#')) != -1 ) 1587 { 1588 // It's an internal Wiki link, but to a named section 1589 1590 String namedSection = linkref.substring( hashMark+1 ); 1591 linkref = linkref.substring( 0, hashMark ); 1592 1593 linkref = MarkupParser.cleanLink( linkref ); 1594 1595 callMutatorChain( m_localLinkMutatorChain, linkref ); 1596 1597 String matchedLink; 1598 if( (matchedLink = linkExists( linkref )) != null ) 1599 { 1600 String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection)); 1601 sectref = sectref.replace('%', '_'); 1602 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1603 } 1604 else 1605 { 1606 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1607 } 1608 } 1609 else 1610 { 1611 // It's an internal Wiki link 1612 linkref = MarkupParser.cleanLink( linkref ); 1613 1614 callMutatorChain( m_localLinkMutatorChain, linkref ); 1615 1616 String matchedLink = linkExists( linkref ); 1617 1618 if( matchedLink != null ) 1619 { 1620 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1621 } 1622 else 1623 { 1624 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1625 } 1626 } 1627 } 1628 } 1629 catch( ParseException e ) 1630 { 1631 log.info("Parser failure: ",e); 1632 Object[] args = { e.getMessage() }; 1633 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1634 } 1635 1636 return m_currentElement; 1637 } 1638 1639 private String findAttachment( String linktext ) 1640 { 1641 AttachmentManager mgr = m_engine.getAttachmentManager(); 1642 Attachment att = null; 1643 1644 try 1645 { 1646 att = mgr.getAttachmentInfo( m_context, linktext ); 1647 } 1648 catch( ProviderException e ) 1649 { 1650 log.warn("Finding attachments failed: ",e); 1651 return null; 1652 } 1653 1654 if( att != null ) 1655 { 1656 return att.getName(); 1657 } 1658 else if( linktext.indexOf('/') != -1 ) 1659 { 1660 return linktext; 1661 } 1662 1663 return null; 1664 } 1665 1666 /** 1667 * Pushes back any string that has been read. It will obviously 1668 * be pushed back in a reverse order. 1669 * 1670 * @since 2.1.77 1671 */ 1672 private void pushBack( String s ) 1673 throws IOException 1674 { 1675 for( int i = s.length()-1; i >= 0; i-- ) 1676 { 1677 pushBack( s.charAt(i) ); 1678 } 1679 } 1680 1681 private Element handleBackslash() 1682 throws IOException 1683 { 1684 int ch = nextToken(); 1685 1686 if( ch == '\\' ) 1687 { 1688 int ch2 = nextToken(); 1689 1690 if( ch2 == '\\' ) 1691 { 1692 pushElement( new Element("br").setAttribute("clear","all")); 1693 return popElement("br"); 1694 } 1695 1696 pushBack( ch2 ); 1697 1698 pushElement( new Element("br") ); 1699 return popElement("br"); 1700 } 1701 1702 pushBack( ch ); 1703 1704 return null; 1705 } 1706 1707 private Element handleUnderscore() 1708 throws IOException 1709 { 1710 int ch = nextToken(); 1711 Element el = null; 1712 1713 if( ch == '_' ) 1714 { 1715 if( m_isbold ) 1716 { 1717 el = popElement("b"); 1718 } 1719 else 1720 { 1721 el = pushElement( new Element("b") ); 1722 } 1723 m_isbold = !m_isbold; 1724 } 1725 else 1726 { 1727 pushBack( ch ); 1728 } 1729 1730 return el; 1731 } 1732 1733 1734 /** 1735 * For example: italics. 1736 */ 1737 private Element handleApostrophe() 1738 throws IOException 1739 { 1740 int ch = nextToken(); 1741 Element el = null; 1742 1743 if( ch == '\'' ) 1744 { 1745 if( m_isitalic ) 1746 { 1747 el = popElement("i"); 1748 } 1749 else 1750 { 1751 el = pushElement( new Element("i") ); 1752 } 1753 m_isitalic = !m_isitalic; 1754 } 1755 else 1756 { 1757 pushBack( ch ); 1758 } 1759 1760 return el; 1761 } 1762 1763 private Element handleOpenbrace( boolean isBlock ) 1764 throws IOException 1765 { 1766 int ch = nextToken(); 1767 1768 if( ch == '{' ) 1769 { 1770 int ch2 = nextToken(); 1771 1772 if( ch2 == '{' ) 1773 { 1774 m_isPre = true; 1775 m_isEscaping = true; 1776 m_isPreBlock = isBlock; 1777 1778 if( isBlock ) 1779 { 1780 startBlockLevel(); 1781 return pushElement( new Element("pre") ); 1782 } 1783 1784 return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") ); 1785 } 1786 1787 pushBack( ch2 ); 1788 1789 return pushElement( new Element("tt") ); 1790 } 1791 1792 pushBack( ch ); 1793 1794 return null; 1795 } 1796 1797 /** 1798 * Handles both }} and }}} 1799 */ 1800 private Element handleClosebrace() 1801 throws IOException 1802 { 1803 int ch2 = nextToken(); 1804 1805 if( ch2 == '}' ) 1806 { 1807 int ch3 = nextToken(); 1808 1809 if( ch3 == '}' ) 1810 { 1811 if( m_isPre ) 1812 { 1813 if( m_isPreBlock ) 1814 { 1815 popElement( "pre" ); 1816 } 1817 else 1818 { 1819 popElement( "span" ); 1820 } 1821 1822 m_isPre = false; 1823 m_isEscaping = false; 1824 return m_currentElement; 1825 } 1826 1827 m_plainTextBuf.append("}}}"); 1828 return m_currentElement; 1829 } 1830 1831 pushBack( ch3 ); 1832 1833 if( !m_isEscaping ) 1834 { 1835 return popElement("tt"); 1836 } 1837 } 1838 1839 pushBack( ch2 ); 1840 1841 return null; 1842 } 1843 1844 private Element handleDash() 1845 throws IOException 1846 { 1847 int ch = nextToken(); 1848 1849 if( ch == '-' ) 1850 { 1851 int ch2 = nextToken(); 1852 1853 if( ch2 == '-' ) 1854 { 1855 int ch3 = nextToken(); 1856 1857 if( ch3 == '-' ) 1858 { 1859 // Empty away all the rest of the dashes. 1860 // Do not forget to return the first non-match back. 1861 do 1862 { 1863 ch = nextToken(); 1864 } 1865 while ( ch == '-' ); 1866 1867 pushBack(ch); 1868 startBlockLevel(); 1869 pushElement( new Element("hr") ); 1870 return popElement( "hr" ); 1871 } 1872 1873 pushBack( ch3 ); 1874 } 1875 pushBack( ch2 ); 1876 } 1877 1878 pushBack( ch ); 1879 1880 return null; 1881 } 1882 1883 private Element handleHeading() 1884 throws IOException 1885 { 1886 Element el = null; 1887 1888 int ch = nextToken(); 1889 1890 Heading hd = new Heading(); 1891 1892 if( ch == '!' ) 1893 { 1894 int ch2 = nextToken(); 1895 1896 if( ch2 == '!' ) 1897 { 1898 String title = peekAheadLine(); 1899 1900 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1901 } 1902 else 1903 { 1904 pushBack( ch2 ); 1905 String title = peekAheadLine(); 1906 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1907 } 1908 } 1909 else 1910 { 1911 pushBack( ch ); 1912 String title = peekAheadLine(); 1913 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1914 } 1915 1916 callHeadingListenerChain( hd ); 1917 1918 m_lastHeading = hd; 1919 1920 if( el != null ) pushElement(el); 1921 1922 return el; 1923 } 1924 1925 /** 1926 * Reads the stream until the next EOL or EOF. Note that it will also read the 1927 * EOL from the stream. 1928 */ 1929 private StringBuilder readUntilEOL() 1930 throws IOException 1931 { 1932 int ch; 1933 StringBuilder buf = new StringBuilder( 256 ); 1934 1935 while( true ) 1936 { 1937 ch = nextToken(); 1938 1939 if( ch == -1 ) 1940 break; 1941 1942 buf.append( (char) ch ); 1943 1944 if( ch == '\n' ) 1945 break; 1946 } 1947 return buf; 1948 } 1949 1950 /** Controls whether italic is restarted after a paragraph shift */ 1951 1952 private boolean m_restartitalic = false; 1953 private boolean m_restartbold = false; 1954 1955 private boolean m_newLine; 1956 1957 /** 1958 * Starts a block level element, therefore closing 1959 * a potential open paragraph tag. 1960 */ 1961 private void startBlockLevel() 1962 { 1963 // These may not continue over block level limits in XHTML 1964 1965 popElement("i"); 1966 popElement("b"); 1967 popElement("tt"); 1968 1969 if( m_isOpenParagraph ) 1970 { 1971 m_isOpenParagraph = false; 1972 popElement("p"); 1973 m_plainTextBuf.append("\n"); // Just small beautification 1974 } 1975 1976 m_restartitalic = m_isitalic; 1977 m_restartbold = m_isbold; 1978 1979 m_isitalic = false; 1980 m_isbold = false; 1981 } 1982 1983 private static String getListType( char c ) 1984 { 1985 if( c == '*' ) 1986 { 1987 return "ul"; 1988 } 1989 else if( c == '#' ) 1990 { 1991 return "ol"; 1992 } 1993 throw new InternalWikiException("Parser got faulty list type: "+c); 1994 } 1995 /** 1996 * Like original handleOrderedList() and handleUnorderedList() 1997 * however handles both ordered ('#') and unordered ('*') mixed together. 1998 */ 1999 2000 // FIXME: Refactor this; it's a bit messy. 2001 2002 private Element handleGeneralList() 2003 throws IOException 2004 { 2005 startBlockLevel(); 2006 2007 String strBullets = readWhile( "*#" ); 2008 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 2009 int numBullets = strBullets.length(); 2010 2011 // override the beginning portion of bullet pattern to be like the previous 2012 // to simulate PHPWiki style lists 2013 2014 if(m_allowPHPWikiStyleLists) 2015 { 2016 // only substitute if different 2017 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 2018 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 2019 { 2020 if(numBullets <= m_genlistlevel) 2021 { 2022 // Substitute all but the last character (keep the expressed bullet preference) 2023 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 2024 + strBullets.substring(numBullets-1, numBullets); 2025 } 2026 else 2027 { 2028 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 2029 } 2030 } 2031 } 2032 2033 // 2034 // Check if this is still of the same type 2035 // 2036 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 2037 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 2038 { 2039 if( numBullets > m_genlistlevel ) 2040 { 2041 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 2042 2043 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 2044 { 2045 // bullets are growing, get from new bullet list 2046 pushElement( new Element("li") ); 2047 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 2048 } 2049 } 2050 else if( numBullets < m_genlistlevel ) 2051 { 2052 // Close the previous list item. 2053 // buf.append( m_renderer.closeListItem() ); 2054 popElement( "li" ); 2055 2056 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 2057 { 2058 // bullets are shrinking, get from old bullet list 2059 2060 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 2061 if( m_genlistlevel > 0 ) 2062 { 2063 popElement( "li" ); 2064 } 2065 2066 } 2067 } 2068 else 2069 { 2070 if( m_genlistlevel > 0 ) 2071 { 2072 popElement( "li" ); 2073 } 2074 } 2075 } 2076 else 2077 { 2078 // 2079 // The pattern has changed, unwind and restart 2080 // 2081 int numEqualBullets; 2082 int numCheckBullets; 2083 2084 // find out how much is the same 2085 numEqualBullets = 0; 2086 numCheckBullets = Math.min(numBullets,m_genlistlevel); 2087 2088 while( numEqualBullets < numCheckBullets ) 2089 { 2090 // if the bullets are equal so far, keep going 2091 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 2092 numEqualBullets++; 2093 // otherwise giveup, we have found how many are equal 2094 else 2095 break; 2096 } 2097 2098 //unwind 2099 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 2100 { 2101 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 2102 if( m_genlistlevel > numBullets ) 2103 { 2104 popElement("li"); 2105 } 2106 } 2107 2108 //rewind 2109 2110 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 2111 for(int i = numEqualBullets; i < numBullets; i++) 2112 { 2113 pushElement( new Element("li") ); 2114 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 2115 } 2116 m_genlistlevel = numBullets; 2117 } 2118 2119 // 2120 // Push a new list item, and eat away any extra whitespace 2121 // 2122 pushElement( new Element("li") ); 2123 readWhile(" "); 2124 2125 // work done, remember the new bullet list (in place of old one) 2126 m_genlistBulletBuffer.setLength(0); 2127 m_genlistBulletBuffer.append(strBullets); 2128 2129 return m_currentElement; 2130 } 2131 2132 private Element unwindGeneralList() 2133 { 2134 //unwind 2135 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 2136 { 2137 popElement( "li" ); 2138 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 2139 } 2140 2141 m_genlistBulletBuffer.setLength(0); 2142 2143 return null; 2144 } 2145 2146 2147 private Element handleDefinitionList() 2148 throws IOException 2149 { 2150 if( !m_isdefinition ) 2151 { 2152 m_isdefinition = true; 2153 2154 startBlockLevel(); 2155 2156 pushElement( new Element("dl") ); 2157 return pushElement( new Element("dt") ); 2158 } 2159 2160 return null; 2161 } 2162 2163 private Element handleOpenbracket() 2164 throws IOException 2165 { 2166 StringBuilder sb = new StringBuilder(40); 2167 int pos = getPosition(); 2168 int ch = nextToken(); 2169 boolean isPlugin = false; 2170 2171 if( ch == '[' ) 2172 { 2173 if( m_wysiwygEditorMode ) 2174 { 2175 sb.append( '[' ); 2176 } 2177 2178 sb.append( (char)ch ); 2179 2180 while( (ch = nextToken()) == '[' ) 2181 { 2182 sb.append( (char)ch ); 2183 } 2184 } 2185 2186 2187 if( ch == '{' ) 2188 { 2189 isPlugin = true; 2190 } 2191 2192 pushBack( ch ); 2193 2194 if( sb.length() > 0 ) 2195 { 2196 m_plainTextBuf.append( sb ); 2197 return m_currentElement; 2198 } 2199 2200 // 2201 // Find end of hyperlink 2202 // 2203 2204 ch = nextToken(); 2205 int nesting = 1; // Check for nested plugins 2206 2207 while( ch != -1 ) 2208 { 2209 int ch2 = nextToken(); pushBack(ch2); 2210 2211 if( isPlugin ) 2212 { 2213 if( ch == '[' && ch2 == '{' ) 2214 { 2215 nesting++; 2216 } 2217 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 2218 { 2219 break; 2220 } 2221 else if( ch == '}' && ch2 == ']' ) 2222 { 2223 // NB: This will be decremented once at the end 2224 nesting--; 2225 } 2226 } 2227 else 2228 { 2229 if( ch == ']' ) 2230 { 2231 break; 2232 } 2233 } 2234 2235 sb.append( (char) ch ); 2236 2237 ch = nextToken(); 2238 } 2239 2240 // 2241 // If the link is never finished, do some tricks to display the rest of the line 2242 // unchanged. 2243 // 2244 if( ch == -1 ) 2245 { 2246 log.debug("Warning: unterminated link detected!"); 2247 m_isEscaping = true; 2248 m_plainTextBuf.append( sb ); 2249 flushPlainText(); 2250 m_isEscaping = false; 2251 return m_currentElement; 2252 } 2253 2254 return handleHyperlinks( sb.toString(), pos ); 2255 } 2256 2257 /** 2258 * Reads the stream until the current brace is closed or stream end. 2259 */ 2260 private String readBraceContent( char opening, char closing ) 2261 throws IOException 2262 { 2263 StringBuilder sb = new StringBuilder(40); 2264 int braceLevel = 1; 2265 int ch; 2266 while(( ch = nextToken() ) != -1 ) 2267 { 2268 if( ch == '\\' ) 2269 { 2270 continue; 2271 } 2272 else if ( ch == opening ) 2273 { 2274 braceLevel++; 2275 } 2276 else if ( ch == closing ) 2277 { 2278 braceLevel--; 2279 if (braceLevel==0) 2280 { 2281 break; 2282 } 2283 } 2284 sb.append( (char)ch ); 2285 } 2286 return sb.toString(); 2287 } 2288 2289 2290 /** 2291 * Handles constructs of type %%(style) and %%class 2292 * @param newLine 2293 * @return An Element containing the div or span, depending on the situation. 2294 * @throws IOException 2295 */ 2296 private Element handleDiv( boolean newLine ) 2297 throws IOException 2298 { 2299 int ch = nextToken(); 2300 Element el = null; 2301 2302 if( ch == '%' ) 2303 { 2304 String style = null; 2305 String clazz = null; 2306 2307 ch = nextToken(); 2308 2309 // 2310 // Style or class? 2311 // 2312 if( ch == '(' ) 2313 { 2314 style = readBraceContent('(',')'); 2315 } 2316 else if( Character.isLetter( (char) ch ) ) 2317 { 2318 pushBack( ch ); 2319 clazz = readUntil( " \t\n\r" ); 2320 ch = nextToken(); 2321 2322 // 2323 // Pop out only spaces, so that the upcoming EOL check does not check the 2324 // next line. 2325 // 2326 if( ch == '\n' || ch == '\r' ) 2327 { 2328 pushBack(ch); 2329 } 2330 } 2331 else 2332 { 2333 // 2334 // Anything else stops. 2335 // 2336 2337 pushBack(ch); 2338 2339 try 2340 { 2341 Boolean isSpan = m_styleStack.pop(); 2342 2343 if( isSpan == null ) 2344 { 2345 // Fail quietly 2346 } 2347 else if( isSpan.booleanValue() ) 2348 { 2349 el = popElement( "span" ); 2350 } 2351 else 2352 { 2353 el = popElement( "div" ); 2354 } 2355 } 2356 catch( EmptyStackException e ) 2357 { 2358 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 2359 return m_currentElement; 2360 } 2361 2362 return el; 2363 } 2364 2365 // 2366 // Check if there is an attempt to do something nasty 2367 // 2368 2369 try 2370 { 2371 style = StringEscapeUtils.unescapeHtml(style); 2372 if( style != null && style.indexOf("javascript:") != -1 ) 2373 { 2374 log.debug("Attempt to output javascript within CSS:"+style); 2375 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2376 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 2377 } 2378 } 2379 catch( NumberFormatException e ) 2380 { 2381 // 2382 // If there are unknown entities, we don't want the parser to stop. 2383 // 2384 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2385 String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 2386 return addElement( makeError( msg ) ); 2387 } 2388 2389 // 2390 // Decide if we should open a div or a span? 2391 // 2392 String eol = peekAheadLine(); 2393 2394 if( eol.trim().length() > 0 ) 2395 { 2396 // There is stuff after the class 2397 2398 el = new Element("span"); 2399 2400 m_styleStack.push( Boolean.TRUE ); 2401 } 2402 else 2403 { 2404 startBlockLevel(); 2405 el = new Element("div"); 2406 m_styleStack.push( Boolean.FALSE ); 2407 } 2408 2409 if( style != null ) el.setAttribute("style", style); 2410 if( clazz != null ) el.setAttribute("class", clazz ); 2411 el = pushElement( el ); 2412 2413 return el; 2414 } 2415 2416 pushBack(ch); 2417 2418 return el; 2419 } 2420 2421 private Element handleSlash( boolean newLine ) 2422 throws IOException 2423 { 2424 int ch = nextToken(); 2425 2426 pushBack(ch); 2427 if( ch == '%' && !m_styleStack.isEmpty() ) 2428 { 2429 return handleDiv( newLine ); 2430 } 2431 2432 return null; 2433 } 2434 2435 private Element handleBar( boolean newLine ) 2436 throws IOException 2437 { 2438 Element el = null; 2439 2440 if( !m_istable && !newLine ) 2441 { 2442 return null; 2443 } 2444 2445 // 2446 // If the bar is in the first column, we will either start 2447 // a new table or continue the old one. 2448 // 2449 2450 if( newLine ) 2451 { 2452 if( !m_istable ) 2453 { 2454 startBlockLevel(); 2455 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2456 m_istable = true; 2457 m_rowNum = 0; 2458 } 2459 2460 m_rowNum++; 2461 Element tr = ( m_rowNum % 2 != 0 ) 2462 ? new Element("tr").setAttribute("class", "odd") 2463 : new Element("tr"); 2464 el = pushElement( tr ); 2465 } 2466 2467 // 2468 // Check out which table cell element to start; 2469 // a header element (th) or a regular element (td). 2470 // 2471 int ch = nextToken(); 2472 2473 if( ch == '|' ) 2474 { 2475 if( !newLine ) 2476 { 2477 el = popElement("th"); 2478 if( el == null ) popElement("td"); 2479 } 2480 el = pushElement( new Element("th") ); 2481 } 2482 else 2483 { 2484 if( !newLine ) 2485 { 2486 el = popElement("td"); 2487 if( el == null ) popElement("th"); 2488 } 2489 2490 el = pushElement( new Element("td") ); 2491 2492 pushBack( ch ); 2493 } 2494 2495 return el; 2496 } 2497 2498 /** 2499 * Generic escape of next character or entity. 2500 */ 2501 private Element handleTilde() 2502 throws IOException 2503 { 2504 int ch = nextToken(); 2505 2506 if( ch == ' ' ) 2507 { 2508 if( m_wysiwygEditorMode ) 2509 { 2510 m_plainTextBuf.append( "~ " ); 2511 } 2512 return m_currentElement; 2513 } 2514 2515 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2516 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2517 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2518 { 2519 if( m_wysiwygEditorMode ) 2520 { 2521 m_plainTextBuf.append( '~' ); 2522 } 2523 2524 m_plainTextBuf.append( (char)ch ); 2525 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2526 return m_currentElement; 2527 } 2528 2529 // No escape. 2530 pushBack( ch ); 2531 2532 return null; 2533 } 2534 2535 private void fillBuffer( Element startElement ) 2536 throws IOException 2537 { 2538 m_currentElement = startElement; 2539 2540 boolean quitReading = false; 2541 m_newLine = true; 2542 disableOutputEscaping(); 2543 2544 while(!quitReading) 2545 { 2546 int ch = nextToken(); 2547 2548 if( ch == -1 ) break; 2549 2550 // 2551 // Check if we're actually ending the preformatted mode. 2552 // We still must do an entity transformation here. 2553 // 2554 if( m_isEscaping ) 2555 { 2556 if( ch == '}' ) 2557 { 2558 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2559 } 2560 else if( ch == -1 ) 2561 { 2562 quitReading = true; 2563 } 2564 else if( ch == '\r' ) 2565 { 2566 // DOS line feeds we ignore. 2567 } 2568 else if( ch == '<' ) 2569 { 2570 m_plainTextBuf.append( "<" ); 2571 } 2572 else if( ch == '>' ) 2573 { 2574 m_plainTextBuf.append( ">" ); 2575 } 2576 else if( ch == '&' ) 2577 { 2578 m_plainTextBuf.append( "&" ); 2579 } 2580 else if( ch == '~' ) 2581 { 2582 String braces = readWhile("}"); 2583 if( braces.length() >= 3 ) 2584 { 2585 m_plainTextBuf.append("}}}"); 2586 2587 braces = braces.substring(3); 2588 } 2589 else 2590 { 2591 m_plainTextBuf.append( (char) ch ); 2592 } 2593 2594 for( int i = braces.length()-1; i >= 0; i-- ) 2595 { 2596 pushBack(braces.charAt(i)); 2597 } 2598 } 2599 else 2600 { 2601 m_plainTextBuf.append( (char) ch ); 2602 } 2603 2604 continue; 2605 } 2606 2607 // 2608 // An empty line stops a list 2609 // 2610 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2611 { 2612 m_plainTextBuf.append(unwindGeneralList()); 2613 } 2614 2615 if( m_newLine && ch != '|' && m_istable ) 2616 { 2617 popElement("table"); 2618 m_istable = false; 2619 } 2620 2621 int skip = IGNORE; 2622 2623 // 2624 // Do the actual parsing and catch any errors. 2625 // 2626 try 2627 { 2628 skip = parseToken( ch ); 2629 } 2630 catch( IllegalDataException e ) 2631 { 2632 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2633 2634 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2635 } 2636 2637 // 2638 // The idea is as follows: If the handler method returns 2639 // an element (el != null), it is assumed that it has been 2640 // added in the stack. Otherwise the character is added 2641 // as is to the plaintext buffer. 2642 // 2643 // For the transition phase, if s != null, it also gets 2644 // added in the plaintext buffer. 2645 // 2646 2647 switch( skip ) 2648 { 2649 case ELEMENT: 2650 m_newLine = false; 2651 break; 2652 2653 case CHARACTER: 2654 m_plainTextBuf.append( (char) ch ); 2655 m_newLine = false; 2656 break; 2657 2658 case IGNORE: 2659 default: 2660 break; 2661 } 2662 } 2663 2664 closeHeadings(); 2665 popElement("domroot"); 2666 } 2667 2668 private String cleanupSuspectData( String s ) 2669 { 2670 StringBuilder sb = new StringBuilder( s.length() ); 2671 2672 for( int i = 0; i < s.length(); i++ ) 2673 { 2674 char c = s.charAt(i); 2675 2676 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2677 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); 2678 } 2679 2680 return sb.toString(); 2681 } 2682 2683 /** The token is a plain character. */ 2684 protected static final int CHARACTER = 0; 2685 2686 /** The token is a wikimarkup element. */ 2687 protected static final int ELEMENT = 1; 2688 2689 /** The token is to be ignored. */ 2690 protected static final int IGNORE = 2; 2691 2692 /** 2693 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2694 * you think this was a wiki markup element, and IGNORE, if you think 2695 * we should ignore this altogether. 2696 * <p> 2697 * To add your own MarkupParser, you can override this method, but it 2698 * is recommended that you call super.parseToken() as well to gain advantage 2699 * of JSPWiki's own markup. You can call it at the start of your own 2700 * parseToken() or end - it does not matter. 2701 * 2702 * @param ch The character under investigation 2703 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2704 * @throws IOException If parsing fails. 2705 */ 2706 protected int parseToken( int ch ) 2707 throws IOException 2708 { 2709 Element el = null; 2710 2711 // 2712 // Now, check the incoming token. 2713 // 2714 switch( ch ) 2715 { 2716 case '\r': 2717 // DOS linefeeds we forget 2718 return IGNORE; 2719 2720 case '\n': 2721 // 2722 // Close things like headings, etc. 2723 // 2724 2725 // FIXME: This is not really very fast 2726 2727 closeHeadings(); 2728 2729 popElement("dl"); // Close definition lists. 2730 if( m_istable ) 2731 { 2732 popElement("tr"); 2733 } 2734 2735 m_isdefinition = false; 2736 2737 if( m_newLine ) 2738 { 2739 // Paragraph change. 2740 startBlockLevel(); 2741 2742 // 2743 // Figure out which elements cannot be enclosed inside 2744 // a <p></p> pair according to XHTML rules. 2745 // 2746 String nextLine = peekAheadLine(); 2747 if( nextLine.length() == 0 || 2748 (nextLine.length() > 0 && 2749 !nextLine.startsWith("{{{") && 2750 !nextLine.startsWith("----") && 2751 !nextLine.startsWith("%%") && 2752 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2753 { 2754 pushElement( new Element("p") ); 2755 m_isOpenParagraph = true; 2756 2757 if( m_restartitalic ) 2758 { 2759 pushElement( new Element("i") ); 2760 m_isitalic = true; 2761 m_restartitalic = false; 2762 } 2763 if( m_restartbold ) 2764 { 2765 pushElement( new Element("b") ); 2766 m_isbold = true; 2767 m_restartbold = false; 2768 } 2769 } 2770 } 2771 else 2772 { 2773 m_plainTextBuf.append("\n"); 2774 m_newLine = true; 2775 } 2776 return IGNORE; 2777 2778 2779 case '\\': 2780 el = handleBackslash(); 2781 break; 2782 2783 case '_': 2784 el = handleUnderscore(); 2785 break; 2786 2787 case '\'': 2788 el = handleApostrophe(); 2789 break; 2790 2791 case '{': 2792 el = handleOpenbrace( m_newLine ); 2793 break; 2794 2795 case '}': 2796 el = handleClosebrace(); 2797 break; 2798 2799 case '-': 2800 if( m_newLine ) 2801 el = handleDash(); 2802 2803 break; 2804 2805 case '!': 2806 if( m_newLine ) 2807 { 2808 el = handleHeading(); 2809 } 2810 break; 2811 2812 case ';': 2813 if( m_newLine ) 2814 { 2815 el = handleDefinitionList(); 2816 } 2817 break; 2818 2819 case ':': 2820 if( m_isdefinition ) 2821 { 2822 popElement("dt"); 2823 el = pushElement( new Element("dd") ); 2824 m_isdefinition = false; 2825 } 2826 break; 2827 2828 case '[': 2829 el = handleOpenbracket(); 2830 break; 2831 2832 case '*': 2833 if( m_newLine ) 2834 { 2835 pushBack('*'); 2836 el = handleGeneralList(); 2837 } 2838 break; 2839 2840 case '#': 2841 if( m_newLine ) 2842 { 2843 pushBack('#'); 2844 el = handleGeneralList(); 2845 } 2846 break; 2847 2848 case '|': 2849 el = handleBar( m_newLine ); 2850 break; 2851 2852 case '~': 2853 el = handleTilde(); 2854 break; 2855 2856 case '%': 2857 el = handleDiv( m_newLine ); 2858 break; 2859 2860 case '/': 2861 el = handleSlash( m_newLine ); 2862 break; 2863 2864 default: 2865 break; 2866 } 2867 2868 return el != null ? ELEMENT : CHARACTER; 2869 } 2870 2871 private void closeHeadings() 2872 { 2873 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2874 { 2875 // Add the hash anchor element at the end of the heading 2876 addElement( new Element("a").setAttribute( "class","hashlink" ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2877 m_lastHeading = null; 2878 } 2879 popElement("h2"); 2880 popElement("h3"); 2881 popElement("h4"); 2882 } 2883 2884 /** 2885 * Parses the entire document from the Reader given in the constructor or 2886 * set by {@link #setInputReader(Reader)}. 2887 * 2888 * @return A WikiDocument, ready to be passed to the renderer. 2889 * @throws IOException If parsing cannot be accomplished. 2890 */ 2891 public WikiDocument parse() 2892 throws IOException 2893 { 2894 WikiDocument d = new WikiDocument( m_context.getPage() ); 2895 d.setContext( m_context ); 2896 2897 Element rootElement = new Element("domroot"); 2898 2899 d.setRootElement( rootElement ); 2900 2901 fillBuffer( rootElement ); 2902 2903 paragraphify(rootElement); 2904 2905 return d; 2906 } 2907 2908 /** 2909 * Checks out that the first paragraph is correctly installed. 2910 * 2911 * @param rootElement 2912 */ 2913 private void paragraphify(Element rootElement) 2914 { 2915 // 2916 // Add the paragraph tag to the first paragraph 2917 // 2918 List< Content > kids = rootElement.getContent(); 2919 2920 if( rootElement.getChild("p") != null ) 2921 { 2922 ArrayList<Content> ls = new ArrayList<Content>(); 2923 int idxOfFirstContent = 0; 2924 int count = 0; 2925 2926 for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) 2927 { 2928 Content c = i.next(); 2929 if( c instanceof Element ) 2930 { 2931 String name = ( ( Element )c ).getName(); 2932 if( isBlockLevel( name ) ) break; 2933 } 2934 2935 if( !(c instanceof ProcessingInstruction) ) 2936 { 2937 ls.add( c ); 2938 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2939 } 2940 } 2941 2942 // 2943 // If there were any elements, then add a new <p> (unless it would 2944 // be an empty one) 2945 // 2946 if( ls.size() > 0 ) 2947 { 2948 Element newel = new Element("p"); 2949 2950 for( Iterator< Content > i = ls.iterator(); i.hasNext(); ) 2951 { 2952 Content c = i.next(); 2953 2954 c.detach(); 2955 newel.addContent(c); 2956 } 2957 2958 // 2959 // Make sure there are no empty <p/> tags added. 2960 // 2961 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) 2962 rootElement.addContent(idxOfFirstContent, newel); 2963 } 2964 } 2965 } 2966 2967 2968 /** 2969 * Compares two Strings, and if one starts with the other, then 2970 * returns null. Otherwise just like the normal Comparator 2971 * for strings. 2972 * 2973 * @since 2974 */ 2975 private static class StartingComparator implements Comparator<String> 2976 { 2977 public int compare( String s1, String s2 ) 2978 { 2979 if( s1.length() > s2.length() ) 2980 { 2981 if( s1.startsWith(s2) && s2.length() > 1 ) return 0; 2982 } 2983 else 2984 { 2985 if( s2.startsWith(s1) && s1.length() > 1 ) return 0; 2986 } 2987 2988 return s1.compareTo( s2 ); 2989 } 2990 2991 } 2992 2993 2994} 2995