001 /* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019 package org.apache.wiki.parser; 020 021 import java.io.IOException; 022 import java.io.Reader; 023 import java.io.StringReader; 024 import java.text.MessageFormat; 025 import java.util.ArrayList; 026 import java.util.Arrays; 027 import java.util.Collection; 028 import java.util.Collections; 029 import java.util.Comparator; 030 import java.util.EmptyStackException; 031 import java.util.HashMap; 032 import java.util.Iterator; 033 import java.util.List; 034 import java.util.Map; 035 import java.util.Properties; 036 import java.util.ResourceBundle; 037 import java.util.Stack; 038 039 import javax.xml.transform.Result; 040 041 import org.apache.commons.lang.StringEscapeUtils; 042 import org.apache.commons.lang.StringUtils; 043 import org.apache.log4j.Logger; 044 import org.apache.oro.text.GlobCompiler; 045 import org.apache.oro.text.regex.MalformedPatternException; 046 import org.apache.oro.text.regex.MatchResult; 047 import org.apache.oro.text.regex.Pattern; 048 import org.apache.oro.text.regex.PatternCompiler; 049 import org.apache.oro.text.regex.PatternMatcher; 050 import org.apache.oro.text.regex.Perl5Compiler; 051 import org.apache.oro.text.regex.Perl5Matcher; 052 import org.apache.wiki.InternalWikiException; 053 import org.apache.wiki.StringTransmutator; 054 import org.apache.wiki.VariableManager; 055 import org.apache.wiki.WikiContext; 056 import org.apache.wiki.WikiPage; 057 import org.apache.wiki.api.exceptions.PluginException; 058 import org.apache.wiki.api.exceptions.ProviderException; 059 import org.apache.wiki.api.plugin.WikiPlugin; 060 import org.apache.wiki.attachment.Attachment; 061 import org.apache.wiki.attachment.AttachmentManager; 062 import org.apache.wiki.auth.WikiSecurityException; 063 import org.apache.wiki.auth.acl.Acl; 064 import org.apache.wiki.i18n.InternationalizationManager; 065 import org.apache.wiki.preferences.Preferences; 066 import org.apache.wiki.render.CleanTextRenderer; 067 import org.apache.wiki.render.RenderingManager; 068 import org.apache.wiki.util.TextUtil; 069 import org.jdom2.Attribute; 070 import org.jdom2.Content; 071 import org.jdom2.Element; 072 import org.jdom2.IllegalDataException; 073 import org.jdom2.ProcessingInstruction; 074 import org.jdom2.Verifier; 075 076 /** 077 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 078 * heart and soul of JSPWiki : make sure you test properly anything that is added, 079 * or else it breaks down horribly. 080 * 081 * @since 2.4 082 */ 083 public class JSPWikiMarkupParser extends MarkupParser { 084 085 /** Name of the outlink image; relative path to the JSPWiki directory. */ 086 private static final String OUTLINK_IMAGE = "images/out.png"; 087 088 /** The value for anchor element <tt>class</tt> attributes when used 089 * for wiki page (normal) links. The value is "wikipage". */ 090 public static final String CLASS_WIKIPAGE = "wikipage"; 091 092 /** The value for anchor element <tt>class</tt> attributes when used 093 * for edit page links. The value is "createpage". */ 094 public static final String CLASS_EDITPAGE = "createpage"; 095 096 /** The value for anchor element <tt>class</tt> attributes when used 097 * for interwiki page links. The value is "interwiki". */ 098 public static final String CLASS_INTERWIKI = "interwiki"; 099 100 protected static final int READ = 0; 101 protected static final int EDIT = 1; 102 protected static final int EMPTY = 2; // Empty message 103 protected static final int LOCAL = 3; 104 protected static final int LOCALREF = 4; 105 protected static final int IMAGE = 5; 106 protected static final int EXTERNAL = 6; 107 protected static final int INTERWIKI = 7; 108 protected static final int IMAGELINK = 8; 109 protected static final int IMAGEWIKILINK = 9; 110 protected static final int ATTACHMENT = 10; 111 112 private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); 113 114 private boolean m_isbold = false; 115 private boolean m_isitalic = false; 116 private boolean m_istable = false; 117 private boolean m_isPre = false; 118 private boolean m_isEscaping = false; 119 private boolean m_isdefinition = false; 120 private boolean m_isPreBlock = false; 121 122 /** Contains style information, in multiple forms. */ 123 private Stack<Boolean> m_styleStack = new Stack<Boolean>(); 124 125 // general list handling 126 private int m_genlistlevel = 0; 127 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 128 private boolean m_allowPHPWikiStyleLists = true; 129 130 131 private boolean m_isOpenParagraph = false; 132 133 /** Keeps image regexp Patterns */ 134 private List<Pattern> m_inlineImagePatterns; 135 136 /** Parser for extended link functionality. */ 137 private LinkParser m_linkParser = new LinkParser(); 138 139 private PatternMatcher m_inlineMatcher = new Perl5Matcher(); 140 141 /** Keeps track of any plain text that gets put in the Text nodes */ 142 private StringBuilder m_plainTextBuf = new StringBuilder(20); 143 144 private Element m_currentElement; 145 146 /** Keep track of duplicate header names. */ 147 private Map<String, Integer> m_titleSectionCounter = new HashMap<String, Integer>(); 148 149 /** If true, consider CamelCase hyperlinks as well. */ 150 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 151 152 /** If true, all hyperlinks are translated as well, regardless whether they 153 are surrounded by brackets. */ 154 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 155 156 /** If true, all outward links (external links) have a small link image appended. */ 157 public static final String PROP_USEOUTLINKIMAGE = "jspwiki.translatorReader.useOutlinkImage"; 158 159 /** If true, all outward attachment info links have a small link image appended. */ 160 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 161 162 /** If set to "true", all external links are tagged with 'rel="nofollow"' */ 163 public static final String PROP_USERELNOFOLLOW = "jspwiki.translatorReader.useRelNofollow"; 164 165 /** If true, then considers CamelCase links as well. */ 166 private boolean m_camelCaseLinks = false; 167 168 /** If true, then generate special output for wysiwyg editing in certain cases */ 169 private boolean m_wysiwygEditorMode = false; 170 171 /** If true, consider URIs that have no brackets as well. */ 172 // FIXME: Currently reserved, but not used. 173 private boolean m_plainUris = false; 174 175 /** If true, all outward links use a small link image. */ 176 private boolean m_useOutlinkImage = true; 177 178 private boolean m_useAttachmentImage = true; 179 180 /** If true, allows raw HTML. */ 181 private boolean m_allowHTML = false; 182 183 private boolean m_useRelNofollow = false; 184 185 private PatternCompiler m_compiler = new Perl5Compiler(); 186 187 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 188 189 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 190 private Pattern m_camelCasePattern; 191 192 private int m_rowNum = 1; 193 194 private Heading m_lastHeading = null; 195 196 /** 197 * This list contains all IANA registered URI protocol 198 * types as of September 2004 + a few well-known extra types. 199 * 200 * JSPWiki recognises all of them as external links. 201 * 202 * This array is sorted during class load, so you can just dump 203 * here whatever you want in whatever order you want. 204 */ 205 static final String[] EXTERNAL_LINKS = { 206 "http:", "ftp:", "https:", "mailto:", 207 "news:", "file:", "rtsp:", "mms:", "ldap:", 208 "gopher:", "nntp:", "telnet:", "wais:", 209 "prospero:", "z39.50s", "z39.50r", "vemmi:", 210 "imap:", "nfs:", "acap:", "tip:", "pop:", 211 "dav:", "opaquelocktoken:", "sip:", "sips:", 212 "tel:", "fax:", "modem:", "soap.beep:", "soap.beeps", 213 "xmlrpc.beep", "xmlrpc.beeps", "urn:", "go:", 214 "h323:", "ipp:", "tftp:", "mupdate:", "pres:", 215 "im:", "mtqp", "smb:" }; 216 217 private static final String INLINE_IMAGE_PATTERNS = "JSPWikiMarkupParser.inlineImagePatterns"; 218 219 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 220 221 private static final String[] CLASS_TYPES = 222 { 223 CLASS_WIKIPAGE, 224 CLASS_EDITPAGE, 225 "", 226 "footnote", 227 "footnoteref", 228 "", 229 "external", 230 CLASS_INTERWIKI, 231 "external", 232 CLASS_WIKIPAGE, 233 "attachment" 234 }; 235 236 237 /** 238 * This Comparator is used to find an external link from c_externalLinks. It 239 * checks if the link starts with the other arraythingie. 240 */ 241 private static Comparator<String> c_startingComparator = new StartingComparator(); 242 243 static 244 { 245 Arrays.sort( EXTERNAL_LINKS ); 246 } 247 248 /** 249 * Creates a markup parser. 250 * 251 * @param context The WikiContext which controls the parsing 252 * @param in Where the data is read from. 253 */ 254 public JSPWikiMarkupParser( WikiContext context, Reader in ) 255 { 256 super( context, in ); 257 initialize(); 258 } 259 260 // FIXME: parsers should be pooled for better performance. 261 @SuppressWarnings("unchecked") 262 private void initialize() 263 { 264 PatternCompiler compiler = new GlobCompiler(); 265 List<Pattern> compiledpatterns; 266 267 // 268 // We cache compiled patterns in the engine, since their creation is 269 // really expensive 270 // 271 compiledpatterns = (List<Pattern>)m_engine.getAttribute( INLINE_IMAGE_PATTERNS ); 272 273 if( compiledpatterns == null ) 274 { 275 compiledpatterns = new ArrayList<Pattern>(20); 276 Collection< String > ptrns = m_engine.getAllInlinedImagePatterns(); 277 278 // 279 // Make them into Regexp Patterns. Unknown patterns 280 // are ignored. 281 // 282 for( Iterator< String > i = ptrns.iterator(); i.hasNext(); ) 283 { 284 try 285 { 286 compiledpatterns.add( compiler.compile( i.next(), 287 GlobCompiler.DEFAULT_MASK|GlobCompiler.READ_ONLY_MASK ) ); 288 } 289 catch( MalformedPatternException e ) 290 { 291 log.error("Malformed pattern in properties: ", e ); 292 } 293 } 294 295 m_engine.setAttribute( INLINE_IMAGE_PATTERNS, compiledpatterns ); 296 } 297 298 m_inlineImagePatterns = Collections.unmodifiableList(compiledpatterns); 299 300 m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN ); 301 if( m_camelCasePattern == null ) 302 { 303 try 304 { 305 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX, 306 Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 307 } 308 catch( MalformedPatternException e ) 309 { 310 log.fatal("Internal error: Someone put in a faulty pattern.",e); 311 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader"); 312 } 313 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 314 } 315 // 316 // Set the properties. 317 // 318 Properties props = m_engine.getWikiProperties(); 319 320 String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 321 322 if( cclinks != null ) 323 { 324 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 325 } 326 else 327 { 328 m_camelCaseLinks = TextUtil.getBooleanProperty( props, 329 PROP_CAMELCASELINKS, 330 m_camelCaseLinks ); 331 } 332 333 334 335 Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE ); 336 if( wysiwygVariable != null ) 337 { 338 m_wysiwygEditorMode = wysiwygVariable.booleanValue(); 339 } 340 341 m_plainUris = getLocalBooleanProperty( m_context, 342 props, 343 PROP_PLAINURIS, 344 m_plainUris ); 345 m_useOutlinkImage = getLocalBooleanProperty( m_context, 346 props, 347 PROP_USEOUTLINKIMAGE, 348 m_useOutlinkImage ); 349 m_useAttachmentImage = getLocalBooleanProperty( m_context, 350 props, 351 PROP_USEATTACHMENTIMAGE, 352 m_useAttachmentImage ); 353 m_allowHTML = getLocalBooleanProperty( m_context, 354 props, 355 MarkupParser.PROP_ALLOWHTML, 356 m_allowHTML ); 357 358 m_useRelNofollow = getLocalBooleanProperty( m_context, 359 props, 360 PROP_USERELNOFOLLOW, 361 m_useRelNofollow ); 362 363 if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null ) 364 { 365 disableAccessRules(); 366 } 367 368 m_context.getPage().setHasMetadata(); 369 } 370 371 /** 372 * This is just a simple helper method which will first check the context 373 * if there is already an override in place, and if there is not, 374 * it will then check the given properties. 375 * 376 * @param context WikiContext to check first 377 * @param props Properties to check next 378 * @param key What key are we searching for? 379 * @param defValue Default value for the boolean 380 * @return True or false 381 */ 382 private static boolean getLocalBooleanProperty( WikiContext context, 383 Properties props, 384 String key, 385 boolean defValue ) 386 { 387 Object bool = context.getVariable(key); 388 389 if( bool != null ) 390 { 391 return TextUtil.isPositive( (String) bool ); 392 } 393 394 return TextUtil.getBooleanProperty( props, key, defValue ); 395 } 396 397 /** 398 * Returns link name, if it exists; otherwise it returns null. 399 */ 400 private String linkExists( String page ) 401 { 402 try 403 { 404 if( page == null || page.length() == 0 ) return null; 405 406 return m_engine.getFinalPageName( page ); 407 } 408 catch( ProviderException e ) 409 { 410 log.warn("TranslatorReader got a faulty page name!",e); 411 412 return page; // FIXME: What would be the correct way to go back? 413 } 414 } 415 416 /** 417 * Calls a transmutator chain. 418 * 419 * @param list Chain to call 420 * @param text Text that should be passed to the mutate() method 421 * of each of the mutators in the chain. 422 * @return The result of the mutation. 423 */ 424 425 protected String callMutatorChain( Collection list, String text ) 426 { 427 if( list == null || list.size() == 0 ) 428 { 429 return text; 430 } 431 432 for( Iterator i = list.iterator(); i.hasNext(); ) 433 { 434 StringTransmutator m = (StringTransmutator) i.next(); 435 436 text = m.mutate( m_context, text ); 437 } 438 439 return text; 440 } 441 442 /** 443 * Calls the heading listeners. 444 * 445 * @param param A Heading object. 446 */ 447 protected void callHeadingListenerChain( Heading param ) 448 { 449 List list = m_headingListenerChain; 450 451 for( Iterator i = list.iterator(); i.hasNext(); ) 452 { 453 HeadingListener h = (HeadingListener) i.next(); 454 455 h.headingAdded( m_context, param ); 456 } 457 } 458 459 /** 460 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 461 * if you really know what you are doing. 462 * 463 * @param type One of the types above 464 * @param link URL to which to link to 465 * @param text Link text 466 * @param section If a particular section identifier is required. 467 * @return An A element. 468 * @since 2.4.78 469 */ 470 protected Element createAnchor(int type, String link, String text, String section) 471 { 472 text = escapeHTMLEntities( text ); 473 section = escapeHTMLEntities( section ); 474 Element el = new Element("a"); 475 el.setAttribute("class",CLASS_TYPES[type]); 476 el.setAttribute("href",link+section); 477 el.addContent(text); 478 return el; 479 } 480 481 private Element makeLink( int type, String link, String text, String section, Iterator attributes ) 482 { 483 Element el = null; 484 485 if( text == null ) text = link; 486 487 text = callMutatorChain( m_linkMutators, text ); 488 489 section = (section != null) ? ("#"+section) : ""; 490 491 // Make sure we make a link name that can be accepted 492 // as a valid URL. 493 494 if( link.length() == 0 ) 495 { 496 type = EMPTY; 497 } 498 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 499 500 switch(type) 501 { 502 case READ: 503 el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section ); 504 break; 505 506 case EDIT: 507 el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" ); 508 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 509 break; 510 511 case EMPTY: 512 el = new Element("u").addContent(text); 513 break; 514 515 // 516 // These two are for local references - footnotes and 517 // references to footnotes. 518 // We embed the page name (or whatever WikiContext gives us) 519 // to make sure the links are unique across Wiki. 520 // 521 case LOCALREF: 522 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 523 break; 524 525 case LOCAL: 526 el = new Element("a").setAttribute("class","footnote"); 527 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 528 el.addContent("["+text+"]"); 529 break; 530 531 // 532 // With the image, external and interwiki types we need to 533 // make sure nobody can put in Javascript or something else 534 // annoying into the links themselves. We do this by preventing 535 // a haxor from stopping the link name short with quotes in 536 // fillBuffer(). 537 // 538 case IMAGE: 539 el = new Element("img").setAttribute("class","inline"); 540 el.setAttribute("src",link); 541 el.setAttribute("alt",text); 542 break; 543 544 case IMAGELINK: 545 el = new Element("img").setAttribute("class","inline"); 546 el.setAttribute("src",link); 547 el.setAttribute("alt",text); 548 el = createAnchor(IMAGELINK,text,"","").addContent(el); 549 break; 550 551 case IMAGEWIKILINK: 552 String pagelink = m_context.getURL(WikiContext.VIEW,text); 553 el = new Element("img").setAttribute("class","inline"); 554 el.setAttribute("src",link); 555 el.setAttribute("alt",text); 556 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 557 break; 558 559 case EXTERNAL: 560 el = createAnchor( EXTERNAL, link, text, section ); 561 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 562 break; 563 564 case INTERWIKI: 565 el = createAnchor( INTERWIKI, link, text, section ); 566 break; 567 568 case ATTACHMENT: 569 String attlink = m_context.getURL( WikiContext.ATTACH, 570 link ); 571 572 String infolink = m_context.getURL( WikiContext.INFO, 573 link ); 574 575 String imglink = m_context.getURL( WikiContext.NONE, 576 "images/attachment_small.png" ); 577 578 el = createAnchor( ATTACHMENT, attlink, text, "" ); 579 580 pushElement(el); 581 popElement(el.getName()); 582 583 if( m_useAttachmentImage ) 584 { 585 el = new Element("img").setAttribute("src",imglink); 586 el.setAttribute("border","0"); 587 el.setAttribute("alt","(info)"); 588 589 el = new Element("a").setAttribute("href",infolink).addContent(el); 590 el.setAttribute("class","infolink"); 591 } 592 else 593 { 594 el = null; 595 } 596 break; 597 598 default: 599 break; 600 } 601 602 if( el != null && attributes != null ) 603 { 604 while( attributes.hasNext() ) 605 { 606 Attribute attr = (Attribute)attributes.next(); 607 if( attr != null ) 608 { 609 el.setAttribute(attr); 610 } 611 } 612 } 613 614 if( el != null ) 615 { 616 flushPlainText(); 617 m_currentElement.addContent( el ); 618 } 619 return el; 620 } 621 622 623 /** 624 * Figures out if a link is an off-site link. This recognizes 625 * the most common protocols by checking how it starts. 626 * 627 * @param link The link to check. 628 * @return true, if this is a link outside of this wiki. 629 * @since 2.4 630 */ 631 632 public static boolean isExternalLink( String link ) 633 { 634 int idx = Arrays.binarySearch( EXTERNAL_LINKS, link, 635 c_startingComparator ); 636 637 // 638 // We need to check here once again; otherwise we might 639 // get a match for something like "h". 640 // 641 if( idx >= 0 && link.startsWith(EXTERNAL_LINKS[idx]) ) return true; 642 643 return false; 644 } 645 646 /** 647 * Returns true, if the link in question is an access 648 * rule. 649 */ 650 private static boolean isAccessRule( String link ) 651 { 652 return link.startsWith("{ALLOW") || link.startsWith("{DENY"); 653 } 654 655 /** 656 * Returns true if the link is really command to insert 657 * a plugin. 658 * <P> 659 * Currently we just check if the link starts with "{INSERT", 660 * or just plain "{" but not "{$". 661 * 662 * @param link Link text, i.e. the contents of text between []. 663 * @return True, if this link seems to be a command to insert a plugin here. 664 */ 665 public static boolean isPluginLink( String link ) 666 { 667 return link.startsWith( "{INSERT" ) || 668 ( link.startsWith( "{" ) && !link.startsWith( "{$" ) ); 669 } 670 671 /** 672 * Matches the given link to the list of image name patterns 673 * to determine whether it should be treated as an inline image 674 * or not. 675 */ 676 private boolean isImageLink( String link ) 677 { 678 if( m_inlineImages ) 679 { 680 link = link.toLowerCase(); 681 682 for( Iterator i = m_inlineImagePatterns.iterator(); i.hasNext(); ) 683 { 684 if( m_inlineMatcher.matches( link, (Pattern) i.next() ) ) 685 return true; 686 } 687 } 688 689 return false; 690 } 691 692 private static boolean isMetadata( String link ) 693 { 694 return link.startsWith("{SET"); 695 } 696 697 /** 698 * These are all of the HTML 4.01 block-level elements. 699 */ 700 private static final String[] BLOCK_ELEMENTS = { 701 "address", "blockquote", "div", "dl", "fieldset", "form", 702 "h1", "h2", "h3", "h4", "h5", "h6", 703 "hr", "noscript", "ol", "p", "pre", "table", "ul" 704 }; 705 706 private static boolean isBlockLevel( String name ) 707 { 708 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 709 } 710 711 /** 712 * This method peeks ahead in the stream until EOL and returns the result. 713 * It will keep the buffers untouched. 714 * 715 * @return The string from the current position to the end of line. 716 */ 717 718 // FIXME: Always returns an empty line, even if the stream is full. 719 private String peekAheadLine() 720 throws IOException 721 { 722 String s = readUntilEOL().toString(); 723 724 if( s.length() > PUSHBACK_BUFFER_SIZE ) 725 { 726 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 727 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 728 } 729 else 730 { 731 try 732 { 733 pushBack( s ); 734 } 735 catch( IOException e ) 736 { 737 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 738 } 739 } 740 return s; 741 } 742 743 744 /** 745 * Writes HTML for error message. Does not add it to the document, you 746 * have to do it yourself. 747 * 748 * @param error The error string. 749 * @return An Element containing the error. 750 */ 751 752 public static Element makeError( String error ) 753 { 754 return new Element("span").setAttribute("class","error").addContent(error); 755 } 756 757 private int flushPlainText() 758 { 759 int numChars = m_plainTextBuf.length(); 760 761 if( numChars > 0 ) 762 { 763 String buf; 764 765 if( !m_allowHTML ) 766 { 767 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 768 } 769 else 770 { 771 buf = m_plainTextBuf.toString(); 772 } 773 // 774 // We must first empty the buffer because the side effect of 775 // calling makeCamelCaseLink() is to call this routine. 776 // 777 778 m_plainTextBuf = new StringBuilder(20); 779 780 try 781 { 782 // 783 // This is the heaviest part of parsing, and therefore we can 784 // do some optimization here. 785 // 786 // 1) Only when the length of the buffer is big enough, we try to do the match 787 // 788 789 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 790 { 791 // System.out.println("Buffer="+buf); 792 793 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 794 { 795 MatchResult result = m_camelCaseMatcher.getMatch(); 796 797 String firstPart = buf.substring(0,result.beginOffset(0)); 798 String prefix = result.group(1); 799 800 if( prefix == null ) prefix = ""; 801 802 String camelCase = result.group(2); 803 String protocol = result.group(3); 804 String uri = protocol+result.group(4); 805 buf = buf.substring(result.endOffset(0)); 806 807 m_currentElement.addContent( firstPart ); 808 809 // 810 // Check if the user does not wish to do URL or WikiWord expansion 811 // 812 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 813 { 814 if( prefix.endsWith("~") ) 815 { 816 if( m_wysiwygEditorMode ) 817 { 818 m_currentElement.addContent( "~" ); 819 } 820 prefix = prefix.substring(0,prefix.length()-1); 821 } 822 if( camelCase != null ) 823 { 824 m_currentElement.addContent( prefix+camelCase ); 825 } 826 else if( protocol != null ) 827 { 828 m_currentElement.addContent( prefix+uri ); 829 } 830 continue; 831 } 832 833 // 834 // Fine, then let's check what kind of a link this was 835 // and emit the proper elements 836 // 837 if( protocol != null ) 838 { 839 char c = uri.charAt(uri.length()-1); 840 if( c == '.' || c == ',' ) 841 { 842 uri = uri.substring(0,uri.length()-1); 843 buf = c + buf; 844 } 845 // System.out.println("URI match "+uri); 846 m_currentElement.addContent( prefix ); 847 makeDirectURILink( uri ); 848 } 849 else 850 { 851 // System.out.println("Matched: '"+camelCase+"'"); 852 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 853 // System.out.println("prefix="+prefix); 854 m_currentElement.addContent( prefix ); 855 856 makeCamelCaseLink( camelCase ); 857 } 858 } 859 860 m_currentElement.addContent( buf ); 861 } 862 else 863 { 864 // 865 // No camelcase asked for, just add the elements 866 // 867 m_currentElement.addContent( buf ); 868 } 869 } 870 catch( IllegalDataException e ) 871 { 872 // 873 // Sometimes it's possible that illegal XML chars is added to the data. 874 // Here we make sure it does not stop parsing. 875 // 876 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 877 } 878 } 879 880 return numChars; 881 } 882 883 /** 884 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 885 * entities that are already escaped). 886 * 887 * @param buf 888 * @return An escaped string. 889 */ 890 private String escapeHTMLEntities(String buf) 891 { 892 StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 893 894 for( int i = 0; i < buf.length(); i++ ) 895 { 896 char ch = buf.charAt(i); 897 898 if( ch == '<' ) 899 { 900 tmpBuf.append("<"); 901 } 902 else if( ch == '>' ) 903 { 904 tmpBuf.append(">"); 905 } 906 else if( ch == '\"' ) 907 { 908 tmpBuf.append("""); 909 } 910 else if( ch == '&' ) 911 { 912 // 913 // If the following is an XML entity reference (&#.*;) we'll 914 // leave it as it is; otherwise we'll replace it with an & 915 // 916 917 boolean isEntity = false; 918 StringBuilder entityBuf = new StringBuilder(); 919 920 if( i < buf.length() -1 ) 921 { 922 for( int j = i; j < buf.length(); j++ ) 923 { 924 char ch2 = buf.charAt(j); 925 926 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 927 { 928 entityBuf.append(ch2); 929 930 if( ch2 == ';' ) 931 { 932 isEntity = true; 933 break; 934 } 935 } 936 else 937 { 938 break; 939 } 940 } 941 } 942 943 if( isEntity ) 944 { 945 tmpBuf.append( entityBuf ); 946 i = i + entityBuf.length() - 1; 947 } 948 else 949 { 950 tmpBuf.append("&"); 951 } 952 953 } 954 else 955 { 956 tmpBuf.append( ch ); 957 } 958 } 959 960 return tmpBuf.toString(); 961 } 962 963 private Element pushElement( Element e ) 964 { 965 flushPlainText(); 966 m_currentElement.addContent( e ); 967 m_currentElement = e; 968 969 return e; 970 } 971 972 private Element addElement( Content e ) 973 { 974 if( e != null ) 975 { 976 flushPlainText(); 977 m_currentElement.addContent( e ); 978 } 979 return m_currentElement; 980 } 981 982 /** 983 * All elements that can be empty by the HTML DTD. 984 */ 985 // Keep sorted. 986 private static final String[] EMPTY_ELEMENTS = { 987 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 988 }; 989 990 /** 991 * Goes through the current element stack and pops all elements until this 992 * element is found - this essentially "closes" and element. 993 * 994 * @param s 995 * @return The new current element, or null, if there was no such element in the entire stack. 996 */ 997 private Element popElement( String s ) 998 { 999 int flushedBytes = flushPlainText(); 1000 1001 Element currEl = m_currentElement; 1002 1003 while( currEl.getParentElement() != null ) 1004 { 1005 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 1006 { 1007 m_currentElement = currEl.getParentElement(); 1008 1009 // 1010 // Check if it's okay for this element to be empty. Then we will 1011 // trick the JDOM generator into not generating an empty element, 1012 // by putting an empty string between the tags. Yes, it's a kludge 1013 // but what'cha gonna do about it. :-) 1014 // 1015 1016 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 1017 { 1018 currEl.addContent(""); 1019 } 1020 1021 return m_currentElement; 1022 } 1023 1024 currEl = currEl.getParentElement(); 1025 } 1026 1027 return null; 1028 } 1029 1030 1031 /** 1032 * Reads the stream until it meets one of the specified 1033 * ending characters, or stream end. The ending character will be left 1034 * in the stream. 1035 */ 1036 private String readUntil( String endChars ) 1037 throws IOException 1038 { 1039 StringBuilder sb = new StringBuilder( 80 ); 1040 int ch = nextToken(); 1041 1042 while( ch != -1 ) 1043 { 1044 if( ch == '\\' ) 1045 { 1046 ch = nextToken(); 1047 if( ch == -1 ) 1048 { 1049 break; 1050 } 1051 } 1052 else 1053 { 1054 if( endChars.indexOf((char)ch) != -1 ) 1055 { 1056 pushBack( ch ); 1057 break; 1058 } 1059 } 1060 sb.append( (char) ch ); 1061 ch = nextToken(); 1062 } 1063 1064 return sb.toString(); 1065 } 1066 1067 /** 1068 * Reads the stream while the characters that have been specified are 1069 * in the stream, returning then the result as a String. 1070 */ 1071 private String readWhile( String endChars ) 1072 throws IOException 1073 { 1074 StringBuilder sb = new StringBuilder( 80 ); 1075 int ch = nextToken(); 1076 1077 while( ch != -1 ) 1078 { 1079 if( endChars.indexOf((char)ch) == -1 ) 1080 { 1081 pushBack( ch ); 1082 break; 1083 } 1084 1085 sb.append( (char) ch ); 1086 ch = nextToken(); 1087 } 1088 1089 return sb.toString(); 1090 } 1091 1092 private JSPWikiMarkupParser m_cleanTranslator; 1093 1094 /** 1095 * Does a lazy init. Otherwise, we would get into a situation 1096 * where HTMLRenderer would try and boot a TranslatorReader before 1097 * the TranslatorReader it is contained by is up. 1098 */ 1099 private JSPWikiMarkupParser getCleanTranslator() 1100 { 1101 if( m_cleanTranslator == null ) 1102 { 1103 WikiContext dummyContext = new WikiContext( m_engine, 1104 m_context.getHttpRequest(), 1105 m_context.getPage() ); 1106 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 1107 1108 m_cleanTranslator.m_allowHTML = true; 1109 } 1110 1111 return m_cleanTranslator; 1112 } 1113 /** 1114 * Modifies the "hd" parameter to contain proper values. Because 1115 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 1116 * % after url encoding with '_'. 1117 * <p> 1118 * Counts also duplicate headings (= headings with similar name), and 1119 * attaches a counter. 1120 */ 1121 private String makeHeadingAnchor( String baseName, String title, Heading hd ) 1122 { 1123 hd.m_titleText = title; 1124 title = MarkupParser.wikifyLink( title ); 1125 1126 hd.m_titleSection = m_engine.encodeName(title); 1127 1128 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) 1129 { 1130 Integer count = m_titleSectionCounter.get( hd.m_titleSection ); 1131 count = count + 1; 1132 m_titleSectionCounter.put( hd.m_titleSection, count ); 1133 hd.m_titleSection += "-" + count; 1134 } 1135 else 1136 { 1137 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 1138 } 1139 1140 hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+ 1141 "-"+hd.m_titleSection; 1142 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 1143 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 1144 1145 return hd.m_titleAnchor; 1146 } 1147 1148 private String makeSectionTitle( String title ) 1149 { 1150 title = title.trim(); 1151 String outTitle; 1152 1153 try 1154 { 1155 JSPWikiMarkupParser dtr = getCleanTranslator(); 1156 dtr.setInputReader( new StringReader(title) ); 1157 1158 CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse()); 1159 1160 outTitle = ctt.getString(); 1161 } 1162 catch( IOException e ) 1163 { 1164 log.fatal("CleanTranslator not working", e); 1165 throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() ); 1166 } 1167 1168 return outTitle; 1169 } 1170 1171 /** 1172 * Returns XHTML for the heading. 1173 * 1174 * @param level The level of the heading. @see Heading 1175 * @param title the title for the heading 1176 * @param hd a List to which heading should be added 1177 * @return An Element containing the heading 1178 */ 1179 public Element makeHeading( int level, String title, Heading hd ) 1180 { 1181 Element el = null; 1182 1183 String pageName = m_context.getPage().getName(); 1184 1185 String outTitle = makeSectionTitle( title ); 1186 1187 hd.m_level = level; 1188 1189 switch( level ) 1190 { 1191 case Heading.HEADING_SMALL: 1192 el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 1193 break; 1194 1195 case Heading.HEADING_MEDIUM: 1196 el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 1197 break; 1198 1199 case Heading.HEADING_LARGE: 1200 el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 1201 break; 1202 1203 default: 1204 throw new InternalWikiException("Illegal heading type "+level); 1205 } 1206 1207 1208 return el; 1209 } 1210 1211 /** 1212 * When given a link to a WikiName, we just return 1213 * a proper HTML link for it. The local link mutator 1214 * chain is also called. 1215 */ 1216 private Element makeCamelCaseLink( String wikiname ) 1217 { 1218 String matchedLink; 1219 1220 callMutatorChain( m_localLinkMutatorChain, wikiname ); 1221 1222 if( (matchedLink = linkExists( wikiname )) != null ) 1223 { 1224 makeLink( READ, matchedLink, wikiname, null, null ); 1225 } 1226 else 1227 { 1228 makeLink( EDIT, wikiname, wikiname, null, null ); 1229 } 1230 1231 return m_currentElement; 1232 } 1233 1234 /** Holds the image URL for the duration of this parser */ 1235 private String m_outlinkImageURL = null; 1236 1237 /** 1238 * Returns an element for the external link image (out.png). However, 1239 * this method caches the URL for the lifetime of this MarkupParser, 1240 * because it's commonly used, and we'll end up with possibly hundreds 1241 * our thousands of references to it... It's a lot faster, too. 1242 * 1243 * @return An element containing the HTML for the outlink image. 1244 */ 1245 private Element outlinkImage() 1246 { 1247 Element el = null; 1248 1249 if( m_useOutlinkImage ) 1250 { 1251 if( m_outlinkImageURL == null ) 1252 { 1253 m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE ); 1254 } 1255 1256 el = new Element("img").setAttribute("class", "outlink"); 1257 el.setAttribute( "src", m_outlinkImageURL ); 1258 el.setAttribute("alt",""); 1259 } 1260 1261 return el; 1262 } 1263 1264 /** 1265 * Takes an URL and turns it into a regular wiki link. Unfortunately, 1266 * because of the way that flushPlainText() works, it already encodes 1267 * all of the XML entities. But so does WikiContext.getURL(), so we 1268 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 1269 * <p> 1270 * What a crappy problem. 1271 * 1272 * @param url 1273 * @return An anchor Element containing the link. 1274 */ 1275 private Element makeDirectURILink( String url ) 1276 { 1277 Element result; 1278 String last = null; 1279 1280 if( url.endsWith(",") || url.endsWith(".") ) 1281 { 1282 last = url.substring( url.length()-1 ); 1283 url = url.substring( 0, url.length()-1 ); 1284 } 1285 1286 callMutatorChain( m_externalLinkMutatorChain, url ); 1287 1288 if( isImageLink( url ) ) 1289 { 1290 result = handleImageLink( StringUtils.replace(url,"&","&"), url, false ); 1291 } 1292 else 1293 { 1294 result = makeLink( EXTERNAL, StringUtils.replace(url,"&","&"), url, null, null ); 1295 addElement( outlinkImage() ); 1296 } 1297 1298 if( last != null ) 1299 { 1300 m_plainTextBuf.append(last); 1301 } 1302 1303 return result; 1304 } 1305 1306 /** 1307 * Image links are handled differently: 1308 * 1. If the text is a WikiName of an existing page, 1309 * it gets linked. 1310 * 2. If the text is an external link, then it is inlined. 1311 * 3. Otherwise it becomes an ALT text. 1312 * 1313 * @param reallink The link to the image. 1314 * @param link Link text portion, may be a link to somewhere else. 1315 * @param hasLinkText If true, then the defined link had a link text available. 1316 * This means that the link text may be a link to a wiki page, 1317 * or an external resource. 1318 */ 1319 1320 // FIXME: isExternalLink() is called twice. 1321 private Element handleImageLink( String reallink, String link, boolean hasLinkText ) 1322 { 1323 String possiblePage = MarkupParser.cleanLink( link ); 1324 1325 if( isExternalLink( link ) && hasLinkText ) 1326 { 1327 return makeLink( IMAGELINK, reallink, link, null, null ); 1328 } 1329 else if( ( linkExists( possiblePage ) ) != null && 1330 hasLinkText ) 1331 { 1332 // System.out.println("Orig="+link+", Matched: "+matchedLink); 1333 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 1334 1335 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 1336 } 1337 else 1338 { 1339 return makeLink( IMAGE, reallink, link, null, null ); 1340 } 1341 } 1342 1343 private Element handleAccessRule( String ruleLine ) 1344 { 1345 if( m_wysiwygEditorMode ) 1346 { 1347 m_currentElement.addContent( "[" + ruleLine + "]" ); 1348 } 1349 1350 if( !m_parseAccessRules ) return m_currentElement; 1351 Acl acl; 1352 WikiPage page = m_context.getRealPage(); 1353 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1354 1355 if( ruleLine.startsWith( "{" ) ) 1356 ruleLine = ruleLine.substring( 1 ); 1357 if( ruleLine.endsWith( "}" ) ) 1358 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1359 1360 if( log.isDebugEnabled() ) log.debug("page="+page.getName()+", ACL = "+ruleLine); 1361 1362 try 1363 { 1364 acl = m_engine.getAclManager().parseAcl( page, ruleLine ); 1365 1366 page.setAcl( acl ); 1367 1368 if( log.isDebugEnabled() ) log.debug( acl.toString() ); 1369 } 1370 catch( WikiSecurityException wse ) 1371 { 1372 return makeError( wse.getMessage() ); 1373 } 1374 1375 return m_currentElement; 1376 } 1377 1378 /** 1379 * Handles metadata setting [{SET foo=bar}] 1380 */ 1381 private Element handleMetadata( String link ) 1382 { 1383 if( m_wysiwygEditorMode ) 1384 { 1385 m_currentElement.addContent( "[" + link + "]" ); 1386 } 1387 1388 try 1389 { 1390 String args = link.substring( link.indexOf(' '), link.length()-1 ); 1391 1392 String name = args.substring( 0, args.indexOf('=') ); 1393 String val = args.substring( args.indexOf('=')+1, args.length() ); 1394 1395 name = name.trim(); 1396 val = val.trim(); 1397 1398 if( val.startsWith("'") ) val = val.substring( 1 ); 1399 if( val.endsWith("'") ) val = val.substring( 0, val.length()-1 ); 1400 1401 // log.debug("SET name='"+name+"', value='"+val+"'."); 1402 1403 if( name.length() > 0 && val.length() > 0 ) 1404 { 1405 val = m_engine.getVariableManager().expandVariables( m_context, 1406 val ); 1407 1408 m_context.getPage().setAttribute( name, val ); 1409 } 1410 } 1411 catch( Exception e ) 1412 { 1413 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1414 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1415 } 1416 1417 return m_currentElement; 1418 } 1419 1420 /** 1421 * Emits a processing instruction that will disable markup escaping. This is 1422 * very useful if you want to emit HTML directly into the stream. 1423 * 1424 */ 1425 private void disableOutputEscaping() 1426 { 1427 addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") ); 1428 } 1429 1430 /** 1431 * Gobbles up all hyperlinks that are encased in square brackets. 1432 */ 1433 private Element handleHyperlinks( String linktext, int pos ) 1434 { 1435 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1436 1437 StringBuilder sb = new StringBuilder(linktext.length()+80); 1438 1439 if( isAccessRule( linktext ) ) 1440 { 1441 return handleAccessRule( linktext ); 1442 } 1443 1444 if( isMetadata( linktext ) ) 1445 { 1446 return handleMetadata( linktext ); 1447 } 1448 1449 if( isPluginLink( linktext ) ) 1450 { 1451 try 1452 { 1453 PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1454 // 1455 // This might sometimes fail, especially if there is something which looks 1456 // like a plugin invocation but is really not. 1457 // 1458 if( pluginContent != null ) 1459 { 1460 addElement( pluginContent ); 1461 1462 pluginContent.executeParse( m_context ); 1463 } 1464 } 1465 catch( PluginException e ) 1466 { 1467 log.info( "Failed to insert plugin: "+e.getMessage() ); 1468 //log.info( "Root cause:",e.getRootThrowable() ); 1469 if( !m_wysiwygEditorMode ) 1470 { 1471 ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1472 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), e.getMessage() ) ) ); 1473 } 1474 } 1475 1476 return m_currentElement; 1477 } 1478 1479 try 1480 { 1481 LinkParser.Link link = m_linkParser.parse(linktext); 1482 linktext = link.getText(); 1483 String linkref = link.getReference(); 1484 1485 // 1486 // Yes, we now have the components separated. 1487 // linktext = the text the link should have 1488 // linkref = the url or page name. 1489 // 1490 // In many cases these are the same. [linktext|linkref]. 1491 // 1492 if( VariableManager.isVariableLink( linktext ) ) 1493 { 1494 Content el = new VariableContent(linktext); 1495 1496 addElement( el ); 1497 } 1498 else if( isExternalLink( linkref ) ) 1499 { 1500 // It's an external link, out of this Wiki 1501 1502 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1503 1504 if( isImageLink( linkref ) ) 1505 { 1506 handleImageLink( linkref, linktext, link.hasReference() ); 1507 } 1508 else 1509 { 1510 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1511 addElement( outlinkImage() ); 1512 } 1513 } 1514 else if( link.isInterwikiLink() ) 1515 { 1516 // It's an interwiki link 1517 // InterWiki links also get added to external link chain 1518 // after the links have been resolved. 1519 1520 // FIXME: There is an interesting issue here: We probably should 1521 // URLEncode the wikiPage, but we can't since some of the 1522 // Wikis use slashes (/), which won't survive URLEncoding. 1523 // Besides, we don't know which character set the other Wiki 1524 // is using, so you'll have to write the entire name as it appears 1525 // in the URL. Bugger. 1526 1527 String extWiki = link.getExternalWiki(); 1528 String wikiPage = link.getExternalWikiPage(); 1529 1530 if( m_wysiwygEditorMode ) 1531 { 1532 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1533 } 1534 else 1535 { 1536 String urlReference = m_engine.getInterWikiURL( extWiki ); 1537 1538 if( urlReference != null ) 1539 { 1540 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1541 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1542 1543 if( isImageLink(urlReference) ) 1544 { 1545 handleImageLink( urlReference, linktext, link.hasReference() ); 1546 } 1547 else 1548 { 1549 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1550 } 1551 1552 if( isExternalLink(urlReference) ) 1553 { 1554 addElement( outlinkImage() ); 1555 } 1556 } 1557 else 1558 { 1559 Object[] args = { extWiki }; 1560 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1561 } 1562 } 1563 } 1564 else if( linkref.startsWith("#") ) 1565 { 1566 // It defines a local footnote 1567 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1568 } 1569 else if( TextUtil.isNumber( linkref ) ) 1570 { 1571 // It defines a reference to a local footnote 1572 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1573 } 1574 else 1575 { 1576 int hashMark = -1; 1577 1578 // 1579 // Internal wiki link, but is it an attachment link? 1580 // 1581 String attachment = findAttachment( linkref ); 1582 if( attachment != null ) 1583 { 1584 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1585 1586 if( isImageLink( linkref ) ) 1587 { 1588 attachment = m_context.getURL( WikiContext.ATTACH, attachment ); 1589 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1590 } 1591 else 1592 { 1593 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1594 } 1595 } 1596 else if( (hashMark = linkref.indexOf('#')) != -1 ) 1597 { 1598 // It's an internal Wiki link, but to a named section 1599 1600 String namedSection = linkref.substring( hashMark+1 ); 1601 linkref = linkref.substring( 0, hashMark ); 1602 1603 linkref = MarkupParser.cleanLink( linkref ); 1604 1605 callMutatorChain( m_localLinkMutatorChain, linkref ); 1606 1607 String matchedLink; 1608 if( (matchedLink = linkExists( linkref )) != null ) 1609 { 1610 String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection)); 1611 sectref = sectref.replace('%', '_'); 1612 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1613 } 1614 else 1615 { 1616 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1617 } 1618 } 1619 else 1620 { 1621 // It's an internal Wiki link 1622 linkref = MarkupParser.cleanLink( linkref ); 1623 1624 callMutatorChain( m_localLinkMutatorChain, linkref ); 1625 1626 String matchedLink = linkExists( linkref ); 1627 1628 if( matchedLink != null ) 1629 { 1630 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1631 } 1632 else 1633 { 1634 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1635 } 1636 } 1637 } 1638 } 1639 catch( ParseException e ) 1640 { 1641 log.info("Parser failure: ",e); 1642 Object[] args = { e.getMessage() }; 1643 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1644 } 1645 1646 return m_currentElement; 1647 } 1648 1649 private String findAttachment( String linktext ) 1650 { 1651 AttachmentManager mgr = m_engine.getAttachmentManager(); 1652 Attachment att = null; 1653 1654 try 1655 { 1656 att = mgr.getAttachmentInfo( m_context, linktext ); 1657 } 1658 catch( ProviderException e ) 1659 { 1660 log.warn("Finding attachments failed: ",e); 1661 return null; 1662 } 1663 1664 if( att != null ) 1665 { 1666 return att.getName(); 1667 } 1668 else if( linktext.indexOf('/') != -1 ) 1669 { 1670 return linktext; 1671 } 1672 1673 return null; 1674 } 1675 1676 /** 1677 * Pushes back any string that has been read. It will obviously 1678 * be pushed back in a reverse order. 1679 * 1680 * @since 2.1.77 1681 */ 1682 private void pushBack( String s ) 1683 throws IOException 1684 { 1685 for( int i = s.length()-1; i >= 0; i-- ) 1686 { 1687 pushBack( s.charAt(i) ); 1688 } 1689 } 1690 1691 private Element handleBackslash() 1692 throws IOException 1693 { 1694 int ch = nextToken(); 1695 1696 if( ch == '\\' ) 1697 { 1698 int ch2 = nextToken(); 1699 1700 if( ch2 == '\\' ) 1701 { 1702 pushElement( new Element("br").setAttribute("clear","all")); 1703 return popElement("br"); 1704 } 1705 1706 pushBack( ch2 ); 1707 1708 pushElement( new Element("br") ); 1709 return popElement("br"); 1710 } 1711 1712 pushBack( ch ); 1713 1714 return null; 1715 } 1716 1717 private Element handleUnderscore() 1718 throws IOException 1719 { 1720 int ch = nextToken(); 1721 Element el = null; 1722 1723 if( ch == '_' ) 1724 { 1725 if( m_isbold ) 1726 { 1727 el = popElement("b"); 1728 } 1729 else 1730 { 1731 el = pushElement( new Element("b") ); 1732 } 1733 m_isbold = !m_isbold; 1734 } 1735 else 1736 { 1737 pushBack( ch ); 1738 } 1739 1740 return el; 1741 } 1742 1743 1744 /** 1745 * For example: italics. 1746 */ 1747 private Element handleApostrophe() 1748 throws IOException 1749 { 1750 int ch = nextToken(); 1751 Element el = null; 1752 1753 if( ch == '\'' ) 1754 { 1755 if( m_isitalic ) 1756 { 1757 el = popElement("i"); 1758 } 1759 else 1760 { 1761 el = pushElement( new Element("i") ); 1762 } 1763 m_isitalic = !m_isitalic; 1764 } 1765 else 1766 { 1767 pushBack( ch ); 1768 } 1769 1770 return el; 1771 } 1772 1773 private Element handleOpenbrace( boolean isBlock ) 1774 throws IOException 1775 { 1776 int ch = nextToken(); 1777 1778 if( ch == '{' ) 1779 { 1780 int ch2 = nextToken(); 1781 1782 if( ch2 == '{' ) 1783 { 1784 m_isPre = true; 1785 m_isEscaping = true; 1786 m_isPreBlock = isBlock; 1787 1788 if( isBlock ) 1789 { 1790 startBlockLevel(); 1791 return pushElement( new Element("pre") ); 1792 } 1793 1794 return pushElement( new Element("span").setAttribute("style","font-family:monospace; white-space:pre;") ); 1795 } 1796 1797 pushBack( ch2 ); 1798 1799 return pushElement( new Element("tt") ); 1800 } 1801 1802 pushBack( ch ); 1803 1804 return null; 1805 } 1806 1807 /** 1808 * Handles both }} and }}} 1809 */ 1810 private Element handleClosebrace() 1811 throws IOException 1812 { 1813 int ch2 = nextToken(); 1814 1815 if( ch2 == '}' ) 1816 { 1817 int ch3 = nextToken(); 1818 1819 if( ch3 == '}' ) 1820 { 1821 if( m_isPre ) 1822 { 1823 if( m_isPreBlock ) 1824 { 1825 popElement( "pre" ); 1826 } 1827 else 1828 { 1829 popElement( "span" ); 1830 } 1831 1832 m_isPre = false; 1833 m_isEscaping = false; 1834 return m_currentElement; 1835 } 1836 1837 m_plainTextBuf.append("}}}"); 1838 return m_currentElement; 1839 } 1840 1841 pushBack( ch3 ); 1842 1843 if( !m_isEscaping ) 1844 { 1845 return popElement("tt"); 1846 } 1847 } 1848 1849 pushBack( ch2 ); 1850 1851 return null; 1852 } 1853 1854 private Element handleDash() 1855 throws IOException 1856 { 1857 int ch = nextToken(); 1858 1859 if( ch == '-' ) 1860 { 1861 int ch2 = nextToken(); 1862 1863 if( ch2 == '-' ) 1864 { 1865 int ch3 = nextToken(); 1866 1867 if( ch3 == '-' ) 1868 { 1869 // Empty away all the rest of the dashes. 1870 // Do not forget to return the first non-match back. 1871 do 1872 { 1873 ch = nextToken(); 1874 } 1875 while ( ch == '-' ); 1876 1877 pushBack(ch); 1878 startBlockLevel(); 1879 pushElement( new Element("hr") ); 1880 return popElement( "hr" ); 1881 } 1882 1883 pushBack( ch3 ); 1884 } 1885 pushBack( ch2 ); 1886 } 1887 1888 pushBack( ch ); 1889 1890 return null; 1891 } 1892 1893 private Element handleHeading() 1894 throws IOException 1895 { 1896 Element el = null; 1897 1898 int ch = nextToken(); 1899 1900 Heading hd = new Heading(); 1901 1902 if( ch == '!' ) 1903 { 1904 int ch2 = nextToken(); 1905 1906 if( ch2 == '!' ) 1907 { 1908 String title = peekAheadLine(); 1909 1910 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1911 } 1912 else 1913 { 1914 pushBack( ch2 ); 1915 String title = peekAheadLine(); 1916 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1917 } 1918 } 1919 else 1920 { 1921 pushBack( ch ); 1922 String title = peekAheadLine(); 1923 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1924 } 1925 1926 callHeadingListenerChain( hd ); 1927 1928 m_lastHeading = hd; 1929 1930 if( el != null ) pushElement(el); 1931 1932 return el; 1933 } 1934 1935 /** 1936 * Reads the stream until the next EOL or EOF. Note that it will also read the 1937 * EOL from the stream. 1938 */ 1939 private StringBuilder readUntilEOL() 1940 throws IOException 1941 { 1942 int ch; 1943 StringBuilder buf = new StringBuilder( 256 ); 1944 1945 while( true ) 1946 { 1947 ch = nextToken(); 1948 1949 if( ch == -1 ) 1950 break; 1951 1952 buf.append( (char) ch ); 1953 1954 if( ch == '\n' ) 1955 break; 1956 } 1957 return buf; 1958 } 1959 1960 /** Controls whether italic is restarted after a paragraph shift */ 1961 1962 private boolean m_restartitalic = false; 1963 private boolean m_restartbold = false; 1964 1965 private boolean m_newLine; 1966 1967 /** 1968 * Starts a block level element, therefore closing 1969 * a potential open paragraph tag. 1970 */ 1971 private void startBlockLevel() 1972 { 1973 // These may not continue over block level limits in XHTML 1974 1975 popElement("i"); 1976 popElement("b"); 1977 popElement("tt"); 1978 1979 if( m_isOpenParagraph ) 1980 { 1981 m_isOpenParagraph = false; 1982 popElement("p"); 1983 m_plainTextBuf.append("\n"); // Just small beautification 1984 } 1985 1986 m_restartitalic = m_isitalic; 1987 m_restartbold = m_isbold; 1988 1989 m_isitalic = false; 1990 m_isbold = false; 1991 } 1992 1993 private static String getListType( char c ) 1994 { 1995 if( c == '*' ) 1996 { 1997 return "ul"; 1998 } 1999 else if( c == '#' ) 2000 { 2001 return "ol"; 2002 } 2003 throw new InternalWikiException("Parser got faulty list type: "+c); 2004 } 2005 /** 2006 * Like original handleOrderedList() and handleUnorderedList() 2007 * however handles both ordered ('#') and unordered ('*') mixed together. 2008 */ 2009 2010 // FIXME: Refactor this; it's a bit messy. 2011 2012 private Element handleGeneralList() 2013 throws IOException 2014 { 2015 startBlockLevel(); 2016 2017 String strBullets = readWhile( "*#" ); 2018 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 2019 int numBullets = strBullets.length(); 2020 2021 // override the beginning portion of bullet pattern to be like the previous 2022 // to simulate PHPWiki style lists 2023 2024 if(m_allowPHPWikiStyleLists) 2025 { 2026 // only substitute if different 2027 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 2028 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 2029 { 2030 if(numBullets <= m_genlistlevel) 2031 { 2032 // Substitute all but the last character (keep the expressed bullet preference) 2033 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 2034 + strBullets.substring(numBullets-1, numBullets); 2035 } 2036 else 2037 { 2038 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 2039 } 2040 } 2041 } 2042 2043 // 2044 // Check if this is still of the same type 2045 // 2046 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 2047 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 2048 { 2049 if( numBullets > m_genlistlevel ) 2050 { 2051 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 2052 2053 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 2054 { 2055 // bullets are growing, get from new bullet list 2056 pushElement( new Element("li") ); 2057 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 2058 } 2059 } 2060 else if( numBullets < m_genlistlevel ) 2061 { 2062 // Close the previous list item. 2063 // buf.append( m_renderer.closeListItem() ); 2064 popElement( "li" ); 2065 2066 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 2067 { 2068 // bullets are shrinking, get from old bullet list 2069 2070 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 2071 if( m_genlistlevel > 0 ) 2072 { 2073 popElement( "li" ); 2074 } 2075 2076 } 2077 } 2078 else 2079 { 2080 if( m_genlistlevel > 0 ) 2081 { 2082 popElement( "li" ); 2083 } 2084 } 2085 } 2086 else 2087 { 2088 // 2089 // The pattern has changed, unwind and restart 2090 // 2091 int numEqualBullets; 2092 int numCheckBullets; 2093 2094 // find out how much is the same 2095 numEqualBullets = 0; 2096 numCheckBullets = Math.min(numBullets,m_genlistlevel); 2097 2098 while( numEqualBullets < numCheckBullets ) 2099 { 2100 // if the bullets are equal so far, keep going 2101 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 2102 numEqualBullets++; 2103 // otherwise giveup, we have found how many are equal 2104 else 2105 break; 2106 } 2107 2108 //unwind 2109 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 2110 { 2111 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 2112 if( m_genlistlevel > numBullets ) 2113 { 2114 popElement("li"); 2115 } 2116 } 2117 2118 //rewind 2119 2120 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 2121 for(int i = numEqualBullets; i < numBullets; i++) 2122 { 2123 pushElement( new Element("li") ); 2124 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 2125 } 2126 m_genlistlevel = numBullets; 2127 } 2128 2129 // 2130 // Push a new list item, and eat away any extra whitespace 2131 // 2132 pushElement( new Element("li") ); 2133 readWhile(" "); 2134 2135 // work done, remember the new bullet list (in place of old one) 2136 m_genlistBulletBuffer.setLength(0); 2137 m_genlistBulletBuffer.append(strBullets); 2138 2139 return m_currentElement; 2140 } 2141 2142 private Element unwindGeneralList() 2143 { 2144 //unwind 2145 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 2146 { 2147 popElement( "li" ); 2148 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 2149 } 2150 2151 m_genlistBulletBuffer.setLength(0); 2152 2153 return null; 2154 } 2155 2156 2157 private Element handleDefinitionList() 2158 throws IOException 2159 { 2160 if( !m_isdefinition ) 2161 { 2162 m_isdefinition = true; 2163 2164 startBlockLevel(); 2165 2166 pushElement( new Element("dl") ); 2167 return pushElement( new Element("dt") ); 2168 } 2169 2170 return null; 2171 } 2172 2173 private Element handleOpenbracket() 2174 throws IOException 2175 { 2176 StringBuilder sb = new StringBuilder(40); 2177 int pos = getPosition(); 2178 int ch = nextToken(); 2179 boolean isPlugin = false; 2180 2181 if( ch == '[' ) 2182 { 2183 if( m_wysiwygEditorMode ) 2184 { 2185 sb.append( '[' ); 2186 } 2187 2188 sb.append( (char)ch ); 2189 2190 while( (ch = nextToken()) == '[' ) 2191 { 2192 sb.append( (char)ch ); 2193 } 2194 } 2195 2196 2197 if( ch == '{' ) 2198 { 2199 isPlugin = true; 2200 } 2201 2202 pushBack( ch ); 2203 2204 if( sb.length() > 0 ) 2205 { 2206 m_plainTextBuf.append( sb ); 2207 return m_currentElement; 2208 } 2209 2210 // 2211 // Find end of hyperlink 2212 // 2213 2214 ch = nextToken(); 2215 int nesting = 1; // Check for nested plugins 2216 2217 while( ch != -1 ) 2218 { 2219 int ch2 = nextToken(); pushBack(ch2); 2220 2221 if( isPlugin ) 2222 { 2223 if( ch == '[' && ch2 == '{' ) 2224 { 2225 nesting++; 2226 } 2227 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 2228 { 2229 break; 2230 } 2231 else if( ch == '}' && ch2 == ']' ) 2232 { 2233 // NB: This will be decremented once at the end 2234 nesting--; 2235 } 2236 } 2237 else 2238 { 2239 if( ch == ']' ) 2240 { 2241 break; 2242 } 2243 } 2244 2245 sb.append( (char) ch ); 2246 2247 ch = nextToken(); 2248 } 2249 2250 // 2251 // If the link is never finished, do some tricks to display the rest of the line 2252 // unchanged. 2253 // 2254 if( ch == -1 ) 2255 { 2256 log.debug("Warning: unterminated link detected!"); 2257 m_isEscaping = true; 2258 m_plainTextBuf.append( sb ); 2259 flushPlainText(); 2260 m_isEscaping = false; 2261 return m_currentElement; 2262 } 2263 2264 return handleHyperlinks( sb.toString(), pos ); 2265 } 2266 2267 /** 2268 * Reads the stream until the current brace is closed or stream end. 2269 */ 2270 private String readBraceContent( char opening, char closing ) 2271 throws IOException 2272 { 2273 StringBuilder sb = new StringBuilder(40); 2274 int braceLevel = 1; 2275 int ch; 2276 while(( ch = nextToken() ) != -1 ) 2277 { 2278 if( ch == '\\' ) 2279 { 2280 continue; 2281 } 2282 else if ( ch == opening ) 2283 { 2284 braceLevel++; 2285 } 2286 else if ( ch == closing ) 2287 { 2288 braceLevel--; 2289 if (braceLevel==0) 2290 { 2291 break; 2292 } 2293 } 2294 sb.append( (char)ch ); 2295 } 2296 return sb.toString(); 2297 } 2298 2299 2300 /** 2301 * Handles constructs of type %%(style) and %%class 2302 * @param newLine 2303 * @return An Element containing the div or span, depending on the situation. 2304 * @throws IOException 2305 */ 2306 private Element handleDiv( boolean newLine ) 2307 throws IOException 2308 { 2309 int ch = nextToken(); 2310 Element el = null; 2311 2312 if( ch == '%' ) 2313 { 2314 String style = null; 2315 String clazz = null; 2316 2317 ch = nextToken(); 2318 2319 // 2320 // Style or class? 2321 // 2322 if( ch == '(' ) 2323 { 2324 style = readBraceContent('(',')'); 2325 } 2326 else if( Character.isLetter( (char) ch ) ) 2327 { 2328 pushBack( ch ); 2329 clazz = readUntil( " \t\n\r" ); 2330 ch = nextToken(); 2331 2332 // 2333 // Pop out only spaces, so that the upcoming EOL check does not check the 2334 // next line. 2335 // 2336 if( ch == '\n' || ch == '\r' ) 2337 { 2338 pushBack(ch); 2339 } 2340 } 2341 else 2342 { 2343 // 2344 // Anything else stops. 2345 // 2346 2347 pushBack(ch); 2348 2349 try 2350 { 2351 Boolean isSpan = m_styleStack.pop(); 2352 2353 if( isSpan == null ) 2354 { 2355 // Fail quietly 2356 } 2357 else if( isSpan.booleanValue() ) 2358 { 2359 el = popElement( "span" ); 2360 } 2361 else 2362 { 2363 el = popElement( "div" ); 2364 } 2365 } 2366 catch( EmptyStackException e ) 2367 { 2368 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 2369 return m_currentElement; 2370 } 2371 2372 return el; 2373 } 2374 2375 // 2376 // Check if there is an attempt to do something nasty 2377 // 2378 2379 try 2380 { 2381 style = StringEscapeUtils.unescapeHtml(style); 2382 if( style != null && style.indexOf("javascript:") != -1 ) 2383 { 2384 log.debug("Attempt to output javascript within CSS:"+style); 2385 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2386 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 2387 } 2388 } 2389 catch( NumberFormatException e ) 2390 { 2391 // 2392 // If there are unknown entities, we don't want the parser to stop. 2393 // 2394 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2395 String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 2396 return addElement( makeError( msg ) ); 2397 } 2398 2399 // 2400 // Decide if we should open a div or a span? 2401 // 2402 String eol = peekAheadLine(); 2403 2404 if( eol.trim().length() > 0 ) 2405 { 2406 // There is stuff after the class 2407 2408 el = new Element("span"); 2409 2410 m_styleStack.push( Boolean.TRUE ); 2411 } 2412 else 2413 { 2414 startBlockLevel(); 2415 el = new Element("div"); 2416 m_styleStack.push( Boolean.FALSE ); 2417 } 2418 2419 if( style != null ) el.setAttribute("style", style); 2420 if( clazz != null ) el.setAttribute("class", clazz ); 2421 el = pushElement( el ); 2422 2423 return el; 2424 } 2425 2426 pushBack(ch); 2427 2428 return el; 2429 } 2430 2431 private Element handleSlash( boolean newLine ) 2432 throws IOException 2433 { 2434 int ch = nextToken(); 2435 2436 pushBack(ch); 2437 if( ch == '%' && !m_styleStack.isEmpty() ) 2438 { 2439 return handleDiv( newLine ); 2440 } 2441 2442 return null; 2443 } 2444 2445 private Element handleBar( boolean newLine ) 2446 throws IOException 2447 { 2448 Element el = null; 2449 2450 if( !m_istable && !newLine ) 2451 { 2452 return null; 2453 } 2454 2455 // 2456 // If the bar is in the first column, we will either start 2457 // a new table or continue the old one. 2458 // 2459 2460 if( newLine ) 2461 { 2462 if( !m_istable ) 2463 { 2464 startBlockLevel(); 2465 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2466 m_istable = true; 2467 m_rowNum = 0; 2468 } 2469 2470 m_rowNum++; 2471 Element tr = ( m_rowNum % 2 != 0 ) 2472 ? new Element("tr").setAttribute("class", "odd") 2473 : new Element("tr"); 2474 el = pushElement( tr ); 2475 } 2476 2477 // 2478 // Check out which table cell element to start; 2479 // a header element (th) or a regular element (td). 2480 // 2481 int ch = nextToken(); 2482 2483 if( ch == '|' ) 2484 { 2485 if( !newLine ) 2486 { 2487 el = popElement("th"); 2488 if( el == null ) popElement("td"); 2489 } 2490 el = pushElement( new Element("th") ); 2491 } 2492 else 2493 { 2494 if( !newLine ) 2495 { 2496 el = popElement("td"); 2497 if( el == null ) popElement("th"); 2498 } 2499 2500 el = pushElement( new Element("td") ); 2501 2502 pushBack( ch ); 2503 } 2504 2505 return el; 2506 } 2507 2508 /** 2509 * Generic escape of next character or entity. 2510 */ 2511 private Element handleTilde() 2512 throws IOException 2513 { 2514 int ch = nextToken(); 2515 2516 if( ch == ' ' ) 2517 { 2518 if( m_wysiwygEditorMode ) 2519 { 2520 m_plainTextBuf.append( "~ " ); 2521 } 2522 return m_currentElement; 2523 } 2524 2525 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2526 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2527 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2528 { 2529 if( m_wysiwygEditorMode ) 2530 { 2531 m_plainTextBuf.append( '~' ); 2532 } 2533 2534 m_plainTextBuf.append( (char)ch ); 2535 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2536 return m_currentElement; 2537 } 2538 2539 // No escape. 2540 pushBack( ch ); 2541 2542 return null; 2543 } 2544 2545 private void fillBuffer( Element startElement ) 2546 throws IOException 2547 { 2548 m_currentElement = startElement; 2549 2550 boolean quitReading = false; 2551 m_newLine = true; 2552 disableOutputEscaping(); 2553 2554 while(!quitReading) 2555 { 2556 int ch = nextToken(); 2557 2558 if( ch == -1 ) break; 2559 2560 // 2561 // Check if we're actually ending the preformatted mode. 2562 // We still must do an entity transformation here. 2563 // 2564 if( m_isEscaping ) 2565 { 2566 if( ch == '}' ) 2567 { 2568 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2569 } 2570 else if( ch == -1 ) 2571 { 2572 quitReading = true; 2573 } 2574 else if( ch == '\r' ) 2575 { 2576 // DOS line feeds we ignore. 2577 } 2578 else if( ch == '<' ) 2579 { 2580 m_plainTextBuf.append( "<" ); 2581 } 2582 else if( ch == '>' ) 2583 { 2584 m_plainTextBuf.append( ">" ); 2585 } 2586 else if( ch == '&' ) 2587 { 2588 m_plainTextBuf.append( "&" ); 2589 } 2590 else if( ch == '~' ) 2591 { 2592 String braces = readWhile("}"); 2593 if( braces.length() >= 3 ) 2594 { 2595 m_plainTextBuf.append("}}}"); 2596 2597 braces = braces.substring(3); 2598 } 2599 else 2600 { 2601 m_plainTextBuf.append( (char) ch ); 2602 } 2603 2604 for( int i = braces.length()-1; i >= 0; i-- ) 2605 { 2606 pushBack(braces.charAt(i)); 2607 } 2608 } 2609 else 2610 { 2611 m_plainTextBuf.append( (char) ch ); 2612 } 2613 2614 continue; 2615 } 2616 2617 // 2618 // An empty line stops a list 2619 // 2620 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2621 { 2622 m_plainTextBuf.append(unwindGeneralList()); 2623 } 2624 2625 if( m_newLine && ch != '|' && m_istable ) 2626 { 2627 popElement("table"); 2628 m_istable = false; 2629 } 2630 2631 int skip = IGNORE; 2632 2633 // 2634 // Do the actual parsing and catch any errors. 2635 // 2636 try 2637 { 2638 skip = parseToken( ch ); 2639 } 2640 catch( IllegalDataException e ) 2641 { 2642 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2643 2644 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2645 } 2646 2647 // 2648 // The idea is as follows: If the handler method returns 2649 // an element (el != null), it is assumed that it has been 2650 // added in the stack. Otherwise the character is added 2651 // as is to the plaintext buffer. 2652 // 2653 // For the transition phase, if s != null, it also gets 2654 // added in the plaintext buffer. 2655 // 2656 2657 switch( skip ) 2658 { 2659 case ELEMENT: 2660 m_newLine = false; 2661 break; 2662 2663 case CHARACTER: 2664 m_plainTextBuf.append( (char) ch ); 2665 m_newLine = false; 2666 break; 2667 2668 case IGNORE: 2669 default: 2670 break; 2671 } 2672 } 2673 2674 closeHeadings(); 2675 popElement("domroot"); 2676 } 2677 2678 private String cleanupSuspectData( String s ) 2679 { 2680 StringBuilder sb = new StringBuilder( s.length() ); 2681 2682 for( int i = 0; i < s.length(); i++ ) 2683 { 2684 char c = s.charAt(i); 2685 2686 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2687 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); 2688 } 2689 2690 return sb.toString(); 2691 } 2692 2693 /** The token is a plain character. */ 2694 protected static final int CHARACTER = 0; 2695 2696 /** The token is a wikimarkup element. */ 2697 protected static final int ELEMENT = 1; 2698 2699 /** The token is to be ignored. */ 2700 protected static final int IGNORE = 2; 2701 2702 /** 2703 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2704 * you think this was a wiki markup element, and IGNORE, if you think 2705 * we should ignore this altogether. 2706 * <p> 2707 * To add your own MarkupParser, you can override this method, but it 2708 * is recommended that you call super.parseToken() as well to gain advantage 2709 * of JSPWiki's own markup. You can call it at the start of your own 2710 * parseToken() or end - it does not matter. 2711 * 2712 * @param ch The character under investigation 2713 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2714 * @throws IOException If parsing fails. 2715 */ 2716 protected int parseToken( int ch ) 2717 throws IOException 2718 { 2719 Element el = null; 2720 2721 // 2722 // Now, check the incoming token. 2723 // 2724 switch( ch ) 2725 { 2726 case '\r': 2727 // DOS linefeeds we forget 2728 return IGNORE; 2729 2730 case '\n': 2731 // 2732 // Close things like headings, etc. 2733 // 2734 2735 // FIXME: This is not really very fast 2736 2737 closeHeadings(); 2738 2739 popElement("dl"); // Close definition lists. 2740 if( m_istable ) 2741 { 2742 popElement("tr"); 2743 } 2744 2745 m_isdefinition = false; 2746 2747 if( m_newLine ) 2748 { 2749 // Paragraph change. 2750 startBlockLevel(); 2751 2752 // 2753 // Figure out which elements cannot be enclosed inside 2754 // a <p></p> pair according to XHTML rules. 2755 // 2756 String nextLine = peekAheadLine(); 2757 if( nextLine.length() == 0 || 2758 (nextLine.length() > 0 && 2759 !nextLine.startsWith("{{{") && 2760 !nextLine.startsWith("----") && 2761 !nextLine.startsWith("%%") && 2762 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2763 { 2764 pushElement( new Element("p") ); 2765 m_isOpenParagraph = true; 2766 2767 if( m_restartitalic ) 2768 { 2769 pushElement( new Element("i") ); 2770 m_isitalic = true; 2771 m_restartitalic = false; 2772 } 2773 if( m_restartbold ) 2774 { 2775 pushElement( new Element("b") ); 2776 m_isbold = true; 2777 m_restartbold = false; 2778 } 2779 } 2780 } 2781 else 2782 { 2783 m_plainTextBuf.append("\n"); 2784 m_newLine = true; 2785 } 2786 return IGNORE; 2787 2788 2789 case '\\': 2790 el = handleBackslash(); 2791 break; 2792 2793 case '_': 2794 el = handleUnderscore(); 2795 break; 2796 2797 case '\'': 2798 el = handleApostrophe(); 2799 break; 2800 2801 case '{': 2802 el = handleOpenbrace( m_newLine ); 2803 break; 2804 2805 case '}': 2806 el = handleClosebrace(); 2807 break; 2808 2809 case '-': 2810 if( m_newLine ) 2811 el = handleDash(); 2812 2813 break; 2814 2815 case '!': 2816 if( m_newLine ) 2817 { 2818 el = handleHeading(); 2819 } 2820 break; 2821 2822 case ';': 2823 if( m_newLine ) 2824 { 2825 el = handleDefinitionList(); 2826 } 2827 break; 2828 2829 case ':': 2830 if( m_isdefinition ) 2831 { 2832 popElement("dt"); 2833 el = pushElement( new Element("dd") ); 2834 m_isdefinition = false; 2835 } 2836 break; 2837 2838 case '[': 2839 el = handleOpenbracket(); 2840 break; 2841 2842 case '*': 2843 if( m_newLine ) 2844 { 2845 pushBack('*'); 2846 el = handleGeneralList(); 2847 } 2848 break; 2849 2850 case '#': 2851 if( m_newLine ) 2852 { 2853 pushBack('#'); 2854 el = handleGeneralList(); 2855 } 2856 break; 2857 2858 case '|': 2859 el = handleBar( m_newLine ); 2860 break; 2861 2862 case '~': 2863 el = handleTilde(); 2864 break; 2865 2866 case '%': 2867 el = handleDiv( m_newLine ); 2868 break; 2869 2870 case '/': 2871 el = handleSlash( m_newLine ); 2872 break; 2873 2874 default: 2875 break; 2876 } 2877 2878 return el != null ? ELEMENT : CHARACTER; 2879 } 2880 2881 private void closeHeadings() 2882 { 2883 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2884 { 2885 // Add the hash anchor element at the end of the heading 2886 addElement( new Element("a").setAttribute( "class","hashlink" ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2887 m_lastHeading = null; 2888 } 2889 popElement("h2"); 2890 popElement("h3"); 2891 popElement("h4"); 2892 } 2893 2894 /** 2895 * Parses the entire document from the Reader given in the constructor or 2896 * set by {@link #setInputReader(Reader)}. 2897 * 2898 * @return A WikiDocument, ready to be passed to the renderer. 2899 * @throws IOException If parsing cannot be accomplished. 2900 */ 2901 public WikiDocument parse() 2902 throws IOException 2903 { 2904 WikiDocument d = new WikiDocument( m_context.getPage() ); 2905 d.setContext( m_context ); 2906 2907 Element rootElement = new Element("domroot"); 2908 2909 d.setRootElement( rootElement ); 2910 2911 fillBuffer( rootElement ); 2912 2913 paragraphify(rootElement); 2914 2915 return d; 2916 } 2917 2918 /** 2919 * Checks out that the first paragraph is correctly installed. 2920 * 2921 * @param rootElement 2922 */ 2923 private void paragraphify(Element rootElement) 2924 { 2925 // 2926 // Add the paragraph tag to the first paragraph 2927 // 2928 List kids = rootElement.getContent(); 2929 2930 if( rootElement.getChild("p") != null ) 2931 { 2932 ArrayList<Content> ls = new ArrayList<Content>(); 2933 int idxOfFirstContent = 0; 2934 int count = 0; 2935 2936 for( Iterator i = kids.iterator(); i.hasNext(); count++ ) 2937 { 2938 Content c = (Content) i.next(); 2939 if( c instanceof Element ) 2940 { 2941 String name = ((Element)c).getName(); 2942 if( isBlockLevel(name) ) break; 2943 } 2944 2945 if( !(c instanceof ProcessingInstruction) ) 2946 { 2947 ls.add( c ); 2948 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2949 } 2950 } 2951 2952 // 2953 // If there were any elements, then add a new <p> (unless it would 2954 // be an empty one) 2955 // 2956 if( ls.size() > 0 ) 2957 { 2958 Element newel = new Element("p"); 2959 2960 for( Iterator i = ls.iterator(); i.hasNext(); ) 2961 { 2962 Content c = (Content) i.next(); 2963 2964 c.detach(); 2965 newel.addContent(c); 2966 } 2967 2968 // 2969 // Make sure there are no empty <p/> tags added. 2970 // 2971 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) 2972 rootElement.addContent(idxOfFirstContent, newel); 2973 } 2974 } 2975 } 2976 2977 2978 /** 2979 * Compares two Strings, and if one starts with the other, then 2980 * returns null. Otherwise just like the normal Comparator 2981 * for strings. 2982 * 2983 * @since 2984 */ 2985 private static class StartingComparator implements Comparator<String> 2986 { 2987 public int compare( String s1, String s2 ) 2988 { 2989 if( s1.length() > s2.length() ) 2990 { 2991 if( s1.startsWith(s2) && s2.length() > 1 ) return 0; 2992 } 2993 else 2994 { 2995 if( s2.startsWith(s1) && s1.length() > 1 ) return 0; 2996 } 2997 2998 return s1.compareTo( s2 ); 2999 } 3000 3001 } 3002 3003 3004 } 3005