001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.commons.text.StringEscapeUtils; 023import org.apache.log4j.Logger; 024import org.apache.oro.text.regex.MalformedPatternException; 025import org.apache.oro.text.regex.MatchResult; 026import org.apache.oro.text.regex.Pattern; 027import org.apache.oro.text.regex.PatternCompiler; 028import org.apache.oro.text.regex.PatternMatcher; 029import org.apache.oro.text.regex.Perl5Compiler; 030import org.apache.oro.text.regex.Perl5Matcher; 031import org.apache.wiki.InternalWikiException; 032import org.apache.wiki.StringTransmutator; 033import org.apache.wiki.WikiContext; 034import org.apache.wiki.WikiPage; 035import org.apache.wiki.api.exceptions.PluginException; 036import org.apache.wiki.api.plugin.WikiPlugin; 037import org.apache.wiki.auth.WikiSecurityException; 038import org.apache.wiki.auth.acl.Acl; 039import org.apache.wiki.i18n.InternationalizationManager; 040import org.apache.wiki.preferences.Preferences; 041import org.apache.wiki.render.CleanTextRenderer; 042import org.apache.wiki.render.RenderingManager; 043import org.apache.wiki.util.TextUtil; 044import org.jdom2.Attribute; 045import org.jdom2.Content; 046import org.jdom2.Element; 047import org.jdom2.IllegalDataException; 048import org.jdom2.ProcessingInstruction; 049import org.jdom2.Verifier; 050 051import javax.xml.transform.Result; 052import java.io.IOException; 053import java.io.Reader; 054import java.io.StringReader; 055import java.text.MessageFormat; 056import java.util.ArrayList; 057import java.util.Arrays; 058import java.util.Collection; 059import java.util.EmptyStackException; 060import java.util.HashMap; 061import java.util.Iterator; 062import java.util.List; 063import java.util.Map; 064import java.util.Properties; 065import java.util.ResourceBundle; 066import java.util.Stack; 067 068/** 069 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 070 * heart and soul of JSPWiki : make sure you test properly anything that is added, 071 * or else it breaks down horribly. 072 * 073 * @since 2.4 074 */ 075public class JSPWikiMarkupParser extends MarkupParser { 076 077 protected static final int READ = 0; 078 protected static final int EDIT = 1; 079 protected static final int EMPTY = 2; // Empty message 080 protected static final int LOCAL = 3; 081 protected static final int LOCALREF = 4; 082 protected static final int IMAGE = 5; 083 protected static final int EXTERNAL = 6; 084 protected static final int INTERWIKI = 7; 085 protected static final int IMAGELINK = 8; 086 protected static final int IMAGEWIKILINK = 9; 087 protected static final int ATTACHMENT = 10; 088 089 private static Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); 090 091 private boolean m_isbold = false; 092 private boolean m_isitalic = false; 093 private boolean m_istable = false; 094 private boolean m_isPre = false; 095 private boolean m_isEscaping = false; 096 private boolean m_isdefinition = false; 097 private boolean m_isPreBlock = false; 098 099 /** Contains style information, in multiple forms. */ 100 private Stack<Boolean> m_styleStack = new Stack<>(); 101 102 // general list handling 103 private int m_genlistlevel = 0; 104 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 105 private boolean m_allowPHPWikiStyleLists = true; 106 107 private boolean m_isOpenParagraph = false; 108 109 /** Parser for extended link functionality. */ 110 private LinkParser m_linkParser = new LinkParser(); 111 112 /** Keeps track of any plain text that gets put in the Text nodes */ 113 private StringBuilder m_plainTextBuf = new StringBuilder(20); 114 115 private Element m_currentElement; 116 117 /** Keep track of duplicate header names. */ 118 private Map<String, Integer> m_titleSectionCounter = new HashMap<>(); 119 120 /** If true, consider CamelCase hyperlinks as well. */ 121 public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; 122 123 /** If true, all hyperlinks are translated as well, regardless whether they 124 are surrounded by brackets. */ 125 public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; 126 127 /** If true, all outward attachment info links have a small link image appended. */ 128 public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; 129 130 /** If true, then considers CamelCase links as well. */ 131 private boolean m_camelCaseLinks = false; 132 133 /** If true, then generate special output for wysiwyg editing in certain cases */ 134 private boolean m_wysiwygEditorMode = false; 135 136 /** If true, consider URIs that have no brackets as well. */ 137 // FIXME: Currently reserved, but not used. 138 private boolean m_plainUris = false; 139 140 /** If true, all outward links use a small link image. */ 141 private boolean m_useOutlinkImage = true; 142 143 private boolean m_useAttachmentImage = true; 144 145 /** If true, allows raw HTML. */ 146 private boolean m_allowHTML = false; 147 148 private boolean m_useRelNofollow = false; 149 150 private PatternCompiler m_compiler = new Perl5Compiler(); 151 152 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 153 154 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 155 private Pattern m_camelCasePattern; 156 157 private int m_rowNum = 1; 158 159 private Heading m_lastHeading = null; 160 161 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 162 163 /** 164 * Creates a markup parser. 165 * 166 * @param context The WikiContext which controls the parsing 167 * @param in Where the data is read from. 168 */ 169 public JSPWikiMarkupParser( WikiContext context, Reader in ) 170 { 171 super( context, in ); 172 initialize(); 173 } 174 175 // FIXME: parsers should be pooled for better performance. 176 private void initialize() 177 { 178 initInlineImagePatterns(); 179 180 m_camelCasePattern = (Pattern) m_engine.getAttribute( CAMELCASE_PATTERN ); 181 if( m_camelCasePattern == null ) 182 { 183 try 184 { 185 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX, 186 Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 187 } 188 catch( MalformedPatternException e ) 189 { 190 log.fatal("Internal error: Someone put in a faulty pattern.",e); 191 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 192 } 193 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 194 } 195 // 196 // Set the properties. 197 // 198 Properties props = m_engine.getWikiProperties(); 199 200 String cclinks = (String)m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 201 202 if( cclinks != null ) 203 { 204 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 205 } 206 else 207 { 208 m_camelCaseLinks = TextUtil.getBooleanProperty( props, 209 PROP_CAMELCASELINKS, 210 m_camelCaseLinks ); 211 } 212 213 Boolean wysiwygVariable = (Boolean)m_context.getVariable( RenderingManager.WYSIWYG_EDITOR_MODE ); 214 if( wysiwygVariable != null ) 215 { 216 m_wysiwygEditorMode = wysiwygVariable.booleanValue(); 217 } 218 219 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 220 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 221 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 222 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 223 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 224 225 if( m_engine.getUserManager().getUserDatabase() == null || m_engine.getAuthorizationManager() == null ) 226 { 227 disableAccessRules(); 228 } 229 230 m_context.getPage().setHasMetadata(); 231 } 232 233 /** 234 * Calls a transmutator chain. 235 * 236 * @param list Chain to call 237 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 238 * @return The result of the mutation. 239 */ 240 protected String callMutatorChain( Collection< StringTransmutator > list, String text ) 241 { 242 if( list == null || list.size() == 0 ) 243 { 244 return text; 245 } 246 247 for( Iterator< StringTransmutator > i = list.iterator(); i.hasNext(); ) 248 { 249 StringTransmutator m = i.next(); 250 251 text = m.mutate( m_context, text ); 252 } 253 254 return text; 255 } 256 257 /** 258 * Calls the heading listeners. 259 * 260 * @param param A Heading object. 261 */ 262 protected void callHeadingListenerChain( Heading param ) 263 { 264 List< HeadingListener > list = m_headingListenerChain; 265 266 for( Iterator< HeadingListener > i = list.iterator(); i.hasNext(); ) 267 { 268 HeadingListener h = i.next(); 269 270 h.headingAdded( m_context, param ); 271 } 272 } 273 274 /** 275 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 276 * if you really know what you are doing. 277 * 278 * @param type One of the types above 279 * @param link URL to which to link to 280 * @param text Link text 281 * @param section If a particular section identifier is required. 282 * @return An A element. 283 * @since 2.4.78 284 */ 285 protected Element createAnchor(int type, String link, String text, String section) 286 { 287 text = escapeHTMLEntities( text ); 288 section = escapeHTMLEntities( section ); 289 Element el = new Element("a"); 290 el.setAttribute("class",CLASS_TYPES[type]); 291 el.setAttribute("href",link+section); 292 el.addContent(text); 293 return el; 294 } 295 296 private Element makeLink( int type, String link, String text, String section, Iterator< Attribute > attributes ) 297 { 298 Element el = null; 299 300 if( text == null ) text = link; 301 302 text = callMutatorChain( m_linkMutators, text ); 303 304 section = (section != null) ? ("#"+section) : ""; 305 306 // Make sure we make a link name that can be accepted 307 // as a valid URL. 308 309 if( link.length() == 0 ) 310 { 311 type = EMPTY; 312 } 313 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 314 315 switch(type) 316 { 317 case READ: 318 el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section ); 319 break; 320 321 case EDIT: 322 el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" ); 323 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 324 325 break; 326 327 case EMPTY: 328 el = new Element("u").addContent(text); 329 break; 330 331 // 332 // These two are for local references - footnotes and 333 // references to footnotes. 334 // We embed the page name (or whatever WikiContext gives us) 335 // to make sure the links are unique across Wiki. 336 // 337 case LOCALREF: 338 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 339 break; 340 341 case LOCAL: 342 el = new Element("a").setAttribute("class",CLASS_FOOTNOTE); 343 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 344 el.addContent("["+text+"]"); 345 break; 346 347 // 348 // With the image, external and interwiki types we need to 349 // make sure nobody can put in Javascript or something else 350 // annoying into the links themselves. We do this by preventing 351 // a haxor from stopping the link name short with quotes in 352 // fillBuffer(). 353 // 354 case IMAGE: 355 el = new Element("img").setAttribute("class","inline"); 356 el.setAttribute("src",link); 357 el.setAttribute("alt",text); 358 break; 359 360 case IMAGELINK: 361 el = new Element("img").setAttribute("class","inline"); 362 el.setAttribute("src",link); 363 el.setAttribute("alt",text); 364 el = createAnchor(IMAGELINK,text,"","").addContent(el); 365 break; 366 367 case IMAGEWIKILINK: 368 String pagelink = m_context.getURL(WikiContext.VIEW,text); 369 el = new Element("img").setAttribute("class","inline"); 370 el.setAttribute("src",link); 371 el.setAttribute("alt",text); 372 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 373 break; 374 375 case EXTERNAL: 376 el = createAnchor( EXTERNAL, link, text, section ); 377 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 378 break; 379 380 case INTERWIKI: 381 el = createAnchor( INTERWIKI, link, text, section ); 382 break; 383 384 case ATTACHMENT: 385 String attlink = m_context.getURL( WikiContext.ATTACH, 386 link ); 387 388 String infolink = m_context.getURL( WikiContext.INFO, 389 link ); 390 391 String imglink = m_context.getURL( WikiContext.NONE, 392 "images/attachment_small.png" ); 393 394 el = createAnchor( ATTACHMENT, attlink, text, "" ); 395 396 if( m_engine.getAttachmentManager().forceDownload( attlink ) ) 397 { 398 el.setAttribute("download", ""); 399 } 400 401 pushElement(el); 402 popElement(el.getName()); 403 404 if( m_useAttachmentImage ) 405 { 406 el = new Element("img").setAttribute("src",imglink); 407 el.setAttribute("border","0"); 408 el.setAttribute("alt","(info)"); 409 410 el = new Element("a").setAttribute("href",infolink).addContent(el); 411 el.setAttribute("class","infolink"); 412 } 413 else 414 { 415 el = null; 416 } 417 break; 418 419 default: 420 break; 421 } 422 423 if( el != null && attributes != null ) 424 { 425 while( attributes.hasNext() ) 426 { 427 Attribute attr = attributes.next(); 428 if( attr != null ) 429 { 430 el.setAttribute(attr); 431 } 432 } 433 } 434 435 if( el != null ) 436 { 437 flushPlainText(); 438 m_currentElement.addContent( el ); 439 } 440 return el; 441 } 442 443 /** 444 * These are all of the HTML 4.01 block-level elements. 445 */ 446 private static final String[] BLOCK_ELEMENTS = { 447 "address", "blockquote", "div", "dl", "fieldset", "form", 448 "h1", "h2", "h3", "h4", "h5", "h6", 449 "hr", "noscript", "ol", "p", "pre", "table", "ul" 450 }; 451 452 private static boolean isBlockLevel( String name ) 453 { 454 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 455 } 456 457 /** 458 * This method peeks ahead in the stream until EOL and returns the result. 459 * It will keep the buffers untouched. 460 * 461 * @return The string from the current position to the end of line. 462 */ 463 464 // FIXME: Always returns an empty line, even if the stream is full. 465 private String peekAheadLine() 466 throws IOException 467 { 468 String s = readUntilEOL().toString(); 469 470 if( s.length() > PUSHBACK_BUFFER_SIZE ) 471 { 472 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 473 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 474 } 475 else 476 { 477 try 478 { 479 pushBack( s ); 480 } 481 catch( IOException e ) 482 { 483 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 484 } 485 } 486 return s; 487 } 488 489 private int flushPlainText() 490 { 491 int numChars = m_plainTextBuf.length(); 492 493 if( numChars > 0 ) 494 { 495 String buf; 496 497 if( !m_allowHTML ) 498 { 499 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 500 } 501 else 502 { 503 buf = m_plainTextBuf.toString(); 504 } 505 // 506 // We must first empty the buffer because the side effect of 507 // calling makeCamelCaseLink() is to call this routine. 508 // 509 510 m_plainTextBuf = new StringBuilder(20); 511 512 try 513 { 514 // 515 // This is the heaviest part of parsing, and therefore we can 516 // do some optimization here. 517 // 518 // 1) Only when the length of the buffer is big enough, we try to do the match 519 // 520 521 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 522 { 523 // System.out.println("Buffer="+buf); 524 525 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 526 { 527 MatchResult result = m_camelCaseMatcher.getMatch(); 528 529 String firstPart = buf.substring(0,result.beginOffset(0)); 530 String prefix = result.group(1); 531 532 if( prefix == null ) prefix = ""; 533 534 String camelCase = result.group(2); 535 String protocol = result.group(3); 536 String uri = protocol+result.group(4); 537 buf = buf.substring(result.endOffset(0)); 538 539 m_currentElement.addContent( firstPart ); 540 541 // 542 // Check if the user does not wish to do URL or WikiWord expansion 543 // 544 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 545 { 546 if( prefix.endsWith("~") ) 547 { 548 if( m_wysiwygEditorMode ) 549 { 550 m_currentElement.addContent( "~" ); 551 } 552 prefix = prefix.substring(0,prefix.length()-1); 553 } 554 if( camelCase != null ) 555 { 556 m_currentElement.addContent( prefix+camelCase ); 557 } 558 else if( protocol != null ) 559 { 560 m_currentElement.addContent( prefix+uri ); 561 } 562 continue; 563 } 564 565 // 566 // Fine, then let's check what kind of a link this was 567 // and emit the proper elements 568 // 569 if( protocol != null ) 570 { 571 char c = uri.charAt(uri.length()-1); 572 if( c == '.' || c == ',' ) 573 { 574 uri = uri.substring(0,uri.length()-1); 575 buf = c + buf; 576 } 577 // System.out.println("URI match "+uri); 578 m_currentElement.addContent( prefix ); 579 makeDirectURILink( uri ); 580 } 581 else 582 { 583 // System.out.println("Matched: '"+camelCase+"'"); 584 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 585 // System.out.println("prefix="+prefix); 586 m_currentElement.addContent( prefix ); 587 588 makeCamelCaseLink( camelCase ); 589 } 590 } 591 592 m_currentElement.addContent( buf ); 593 } 594 else 595 { 596 // 597 // No camelcase asked for, just add the elements 598 // 599 m_currentElement.addContent( buf ); 600 } 601 } 602 catch( IllegalDataException e ) 603 { 604 // 605 // Sometimes it's possible that illegal XML chars is added to the data. 606 // Here we make sure it does not stop parsing. 607 // 608 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 609 } 610 } 611 612 return numChars; 613 } 614 615 /** 616 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 617 * entities that are already escaped). 618 * 619 * @param buf 620 * @return An escaped string. 621 */ 622 private String escapeHTMLEntities(String buf) 623 { 624 StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 625 626 for( int i = 0; i < buf.length(); i++ ) 627 { 628 char ch = buf.charAt(i); 629 630 if( ch == '<' ) 631 { 632 tmpBuf.append("<"); 633 } 634 else if( ch == '>' ) 635 { 636 tmpBuf.append(">"); 637 } 638 else if( ch == '\"' ) 639 { 640 tmpBuf.append("""); 641 } 642 else if( ch == '&' ) 643 { 644 // 645 // If the following is an XML entity reference (&#.*;) we'll 646 // leave it as it is; otherwise we'll replace it with an & 647 // 648 649 boolean isEntity = false; 650 StringBuilder entityBuf = new StringBuilder(); 651 652 if( i < buf.length() -1 ) 653 { 654 for( int j = i; j < buf.length(); j++ ) 655 { 656 char ch2 = buf.charAt(j); 657 658 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 659 { 660 entityBuf.append(ch2); 661 662 if( ch2 == ';' ) 663 { 664 isEntity = true; 665 break; 666 } 667 } 668 else 669 { 670 break; 671 } 672 } 673 } 674 675 if( isEntity ) 676 { 677 tmpBuf.append( entityBuf ); 678 i = i + entityBuf.length() - 1; 679 } 680 else 681 { 682 tmpBuf.append("&"); 683 } 684 685 } 686 else 687 { 688 tmpBuf.append( ch ); 689 } 690 } 691 692 return tmpBuf.toString(); 693 } 694 695 private Element pushElement( Element e ) 696 { 697 flushPlainText(); 698 m_currentElement.addContent( e ); 699 m_currentElement = e; 700 701 return e; 702 } 703 704 private Element addElement( Content e ) 705 { 706 if( e != null ) 707 { 708 flushPlainText(); 709 m_currentElement.addContent( e ); 710 } 711 return m_currentElement; 712 } 713 714 /** 715 * All elements that can be empty by the HTML DTD. 716 */ 717 // Keep sorted. 718 private static final String[] EMPTY_ELEMENTS = { 719 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 720 }; 721 722 /** 723 * Goes through the current element stack and pops all elements until this 724 * element is found - this essentially "closes" and element. 725 * 726 * @param s 727 * @return The new current element, or null, if there was no such element in the entire stack. 728 */ 729 private Element popElement( String s ) 730 { 731 int flushedBytes = flushPlainText(); 732 733 Element currEl = m_currentElement; 734 735 while( currEl.getParentElement() != null ) 736 { 737 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 738 { 739 m_currentElement = currEl.getParentElement(); 740 741 // 742 // Check if it's okay for this element to be empty. Then we will 743 // trick the JDOM generator into not generating an empty element, 744 // by putting an empty string between the tags. Yes, it's a kludge 745 // but what'cha gonna do about it. :-) 746 // 747 748 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 749 { 750 currEl.addContent(""); 751 } 752 753 return m_currentElement; 754 } 755 756 currEl = currEl.getParentElement(); 757 } 758 759 return null; 760 } 761 762 763 /** 764 * Reads the stream until it meets one of the specified 765 * ending characters, or stream end. The ending character will be left 766 * in the stream. 767 */ 768 private String readUntil( String endChars ) 769 throws IOException 770 { 771 StringBuilder sb = new StringBuilder( 80 ); 772 int ch = nextToken(); 773 774 while( ch != -1 ) 775 { 776 if( ch == '\\' ) 777 { 778 ch = nextToken(); 779 if( ch == -1 ) 780 { 781 break; 782 } 783 } 784 else 785 { 786 if( endChars.indexOf((char)ch) != -1 ) 787 { 788 pushBack( ch ); 789 break; 790 } 791 } 792 sb.append( (char) ch ); 793 ch = nextToken(); 794 } 795 796 return sb.toString(); 797 } 798 799 /** 800 * Reads the stream while the characters that have been specified are 801 * in the stream, returning then the result as a String. 802 */ 803 private String readWhile( String endChars ) 804 throws IOException 805 { 806 StringBuilder sb = new StringBuilder( 80 ); 807 int ch = nextToken(); 808 809 while( ch != -1 ) 810 { 811 if( endChars.indexOf((char)ch) == -1 ) 812 { 813 pushBack( ch ); 814 break; 815 } 816 817 sb.append( (char) ch ); 818 ch = nextToken(); 819 } 820 821 return sb.toString(); 822 } 823 824 private JSPWikiMarkupParser m_cleanTranslator; 825 826 /** 827 * Does a lazy init. Otherwise, we would get into a situation 828 * where HTMLRenderer would try and boot a TranslatorReader before 829 * the TranslatorReader it is contained by is up. 830 */ 831 private JSPWikiMarkupParser getCleanTranslator() 832 { 833 if( m_cleanTranslator == null ) 834 { 835 WikiContext dummyContext = new WikiContext( m_engine, 836 m_context.getHttpRequest(), 837 m_context.getPage() ); 838 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 839 840 m_cleanTranslator.m_allowHTML = true; 841 } 842 843 return m_cleanTranslator; 844 } 845 /** 846 * Modifies the "hd" parameter to contain proper values. Because 847 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 848 * % after url encoding with '_'. 849 * <p> 850 * Counts also duplicate headings (= headings with similar name), and 851 * attaches a counter. 852 */ 853 private String makeHeadingAnchor( String baseName, String title, Heading hd ) 854 { 855 hd.m_titleText = title; 856 title = MarkupParser.wikifyLink( title ); 857 858 hd.m_titleSection = m_engine.encodeName(title); 859 860 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) 861 { 862 Integer count = m_titleSectionCounter.get( hd.m_titleSection ); 863 count = count + 1; 864 m_titleSectionCounter.put( hd.m_titleSection, count ); 865 hd.m_titleSection += "-" + count; 866 } 867 else 868 { 869 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 870 } 871 872 hd.m_titleAnchor = "section-"+m_engine.encodeName(baseName)+ 873 "-"+hd.m_titleSection; 874 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 875 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 876 877 return hd.m_titleAnchor; 878 } 879 880 private String makeSectionTitle( String title ) 881 { 882 title = title.trim(); 883 String outTitle; 884 885 try 886 { 887 JSPWikiMarkupParser dtr = getCleanTranslator(); 888 dtr.setInputReader( new StringReader(title) ); 889 890 CleanTextRenderer ctt = new CleanTextRenderer(m_context, dtr.parse()); 891 892 outTitle = ctt.getString(); 893 } 894 catch( IOException e ) 895 { 896 log.fatal("CleanTranslator not working", e); 897 throw new InternalWikiException("CleanTranslator not working as expected, when cleaning title"+ e.getMessage() , e); 898 } 899 900 return outTitle; 901 } 902 903 /** 904 * Returns XHTML for the heading. 905 * 906 * @param level The level of the heading. @see Heading 907 * @param title the title for the heading 908 * @param hd a List to which heading should be added 909 * @return An Element containing the heading 910 */ 911 public Element makeHeading( int level, String title, Heading hd ) 912 { 913 Element el = null; 914 915 String pageName = m_context.getPage().getName(); 916 917 String outTitle = makeSectionTitle( title ); 918 919 hd.m_level = level; 920 921 switch( level ) 922 { 923 case Heading.HEADING_SMALL: 924 el = new Element("h4").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 925 break; 926 927 case Heading.HEADING_MEDIUM: 928 el = new Element("h3").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 929 break; 930 931 case Heading.HEADING_LARGE: 932 el = new Element("h2").setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd )); 933 break; 934 935 default: 936 throw new InternalWikiException("Illegal heading type "+level); 937 } 938 939 940 return el; 941 } 942 943 /** 944 * When given a link to a WikiName, we just return 945 * a proper HTML link for it. The local link mutator 946 * chain is also called. 947 */ 948 private Element makeCamelCaseLink( String wikiname ) 949 { 950 String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 951 952 callMutatorChain( m_localLinkMutatorChain, wikiname ); 953 954 if( matchedLink != null ) { 955 makeLink( READ, matchedLink, wikiname, null, null ); 956 } else { 957 makeLink( EDIT, wikiname, wikiname, null, null ); 958 } 959 960 return m_currentElement; 961 } 962 963 /** Holds the image URL for the duration of this parser */ 964 private String m_outlinkImageURL = null; 965 966 /** 967 * Returns an element for the external link image (out.png). However, 968 * this method caches the URL for the lifetime of this MarkupParser, 969 * because it's commonly used, and we'll end up with possibly hundreds 970 * our thousands of references to it... It's a lot faster, too. 971 * 972 * @return An element containing the HTML for the outlink image. 973 */ 974 private Element outlinkImage() 975 { 976 Element el = null; 977 978 if( m_useOutlinkImage ) 979 { 980 if( m_outlinkImageURL == null ) 981 { 982 m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE ); 983 } 984 985 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 986 el.setAttribute( "src", m_outlinkImageURL ); 987 el.setAttribute( "alt","" ); 988 } 989 990 return el; 991 } 992 993 /** 994 * Takes an URL and turns it into a regular wiki link. Unfortunately, 995 * because of the way that flushPlainText() works, it already encodes 996 * all of the XML entities. But so does WikiContext.getURL(), so we 997 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 998 * <p> 999 * What a crappy problem. 1000 * 1001 * @param url 1002 * @return An anchor Element containing the link. 1003 */ 1004 private Element makeDirectURILink( String url ) 1005 { 1006 Element result; 1007 String last = null; 1008 1009 if( url.endsWith(",") || url.endsWith(".") ) 1010 { 1011 last = url.substring( url.length()-1 ); 1012 url = url.substring( 0, url.length()-1 ); 1013 } 1014 1015 callMutatorChain( m_externalLinkMutatorChain, url ); 1016 1017 if( m_linkParsingOperations.isImageLink( url ) ) 1018 { 1019 result = handleImageLink( StringUtils.replace(url,"&","&"), url, false ); 1020 } 1021 else 1022 { 1023 result = makeLink( EXTERNAL, StringUtils.replace(url,"&","&"), url, null, null ); 1024 addElement( outlinkImage() ); 1025 } 1026 1027 if( last != null ) 1028 { 1029 m_plainTextBuf.append(last); 1030 } 1031 1032 return result; 1033 } 1034 1035 /** 1036 * Image links are handled differently: 1037 * 1. If the text is a WikiName of an existing page, 1038 * it gets linked. 1039 * 2. If the text is an external link, then it is inlined. 1040 * 3. Otherwise it becomes an ALT text. 1041 * 1042 * @param reallink The link to the image. 1043 * @param link Link text portion, may be a link to somewhere else. 1044 * @param hasLinkText If true, then the defined link had a link text available. 1045 * This means that the link text may be a link to a wiki page, 1046 * or an external resource. 1047 */ 1048 1049 // FIXME: isExternalLink() is called twice. 1050 private Element handleImageLink( String reallink, String link, boolean hasLinkText ) 1051 { 1052 String possiblePage = MarkupParser.cleanLink( link ); 1053 1054 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) 1055 { 1056 return makeLink( IMAGELINK, reallink, link, null, null ); 1057 } 1058 else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) 1059 { 1060 // System.out.println("Orig="+link+", Matched: "+matchedLink); 1061 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 1062 1063 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 1064 } 1065 else 1066 { 1067 return makeLink( IMAGE, reallink, link, null, null ); 1068 } 1069 } 1070 1071 private Element handleAccessRule( String ruleLine ) { 1072 if( m_wysiwygEditorMode ) { 1073 m_currentElement.addContent( "[" + ruleLine + "]" ); 1074 } 1075 1076 if( !m_parseAccessRules ) { 1077 return m_currentElement; 1078 } 1079 final WikiPage page = m_context.getRealPage(); 1080 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1081 1082 if( ruleLine.startsWith( "{" ) ) { 1083 ruleLine = ruleLine.substring( 1 ); 1084 } 1085 1086 if( ruleLine.endsWith( "}" ) ) { 1087 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1088 } 1089 1090 if( log.isDebugEnabled() ) { 1091 log.debug("page="+page.getName()+", ACL = "+ruleLine); 1092 } 1093 1094 try { 1095 final Acl acl = m_engine.getAclManager().parseAcl( page, ruleLine ); 1096 page.setAcl( acl ); 1097 1098 if( log.isDebugEnabled() ) { 1099 log.debug( acl.toString() ); 1100 } 1101 } catch( final WikiSecurityException wse ) { 1102 return makeError( wse.getMessage() ); 1103 } 1104 1105 return m_currentElement; 1106 } 1107 1108 /** 1109 * Handles metadata setting [{SET foo=bar}] 1110 */ 1111 private Element handleMetadata( final String link ) { 1112 if( m_wysiwygEditorMode ) { 1113 m_currentElement.addContent( "[" + link + "]" ); 1114 } 1115 1116 try { 1117 final String args = link.substring( link.indexOf(' '), link.length()-1 ); 1118 final String name = args.substring( 0, args.indexOf('=') ).trim(); 1119 String val = args.substring( args.indexOf('=')+1 ).trim(); 1120 1121 if( val.startsWith("'") ) { 1122 val = val.substring( 1 ); 1123 } 1124 if( val.endsWith("'") ) { 1125 val = val.substring( 0, val.length()-1 ); 1126 } 1127 1128 // log.debug("SET name='"+name+"', value='"+val+"'."); 1129 1130 if( name.length() > 0 && val.length() > 0 ) { 1131 val = m_engine.getVariableManager().expandVariables( m_context, val ); 1132 m_context.getPage().setAttribute( name, val ); 1133 } 1134 } catch( final Exception e ) { 1135 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1136 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1137 } 1138 1139 return m_currentElement; 1140 } 1141 1142 /** 1143 * Emits a processing instruction that will disable markup escaping. This is 1144 * very useful if you want to emit HTML directly into the stream. 1145 * 1146 */ 1147 private void disableOutputEscaping() 1148 { 1149 addElement( new ProcessingInstruction(Result.PI_DISABLE_OUTPUT_ESCAPING, "") ); 1150 } 1151 1152 /** 1153 * Gobbles up all hyperlinks that are encased in square brackets. 1154 */ 1155 private Element handleHyperlinks( String linktext, int pos ) 1156 { 1157 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1158 1159 StringBuilder sb = new StringBuilder(linktext.length()+80); 1160 1161 if( m_linkParsingOperations.isAccessRule( linktext ) ) 1162 { 1163 return handleAccessRule( linktext ); 1164 } 1165 1166 if( m_linkParsingOperations.isMetadata( linktext ) ) 1167 { 1168 return handleMetadata( linktext ); 1169 } 1170 1171 if( m_linkParsingOperations.isPluginLink( linktext ) ) 1172 { 1173 try 1174 { 1175 PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1176 // 1177 // This might sometimes fail, especially if there is something which looks 1178 // like a plugin invocation but is really not. 1179 // 1180 if( pluginContent != null ) 1181 { 1182 addElement( pluginContent ); 1183 1184 pluginContent.executeParse( m_context ); 1185 } 1186 } 1187 catch( PluginException e ) 1188 { 1189 log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 1190 //log.info( "Root cause:",e.getRootThrowable() ); 1191 if( !m_wysiwygEditorMode ) 1192 { 1193 ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1194 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 1195 m_context.getRealPage().getWiki(), 1196 m_context.getRealPage().getName(), 1197 e.getMessage() ) ) ); 1198 } 1199 } 1200 1201 return m_currentElement; 1202 } 1203 1204 try 1205 { 1206 LinkParser.Link link = m_linkParser.parse(linktext); 1207 linktext = link.getText(); 1208 String linkref = link.getReference(); 1209 1210 // 1211 // Yes, we now have the components separated. 1212 // linktext = the text the link should have 1213 // linkref = the url or page name. 1214 // 1215 // In many cases these are the same. [linktext|linkref]. 1216 // 1217 if( m_linkParsingOperations.isVariableLink( linktext ) ) 1218 { 1219 Content el = new VariableContent(linktext); 1220 1221 addElement( el ); 1222 } 1223 else if( m_linkParsingOperations.isExternalLink( linkref ) ) 1224 { 1225 // It's an external link, out of this Wiki 1226 1227 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1228 1229 if( m_linkParsingOperations.isImageLink( linkref ) ) 1230 { 1231 handleImageLink( linkref, linktext, link.hasReference() ); 1232 } 1233 else 1234 { 1235 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1236 addElement( outlinkImage() ); 1237 } 1238 } 1239 else if( link.isInterwikiLink() ) 1240 { 1241 // It's an interwiki link 1242 // InterWiki links also get added to external link chain 1243 // after the links have been resolved. 1244 1245 // FIXME: There is an interesting issue here: We probably should 1246 // URLEncode the wikiPage, but we can't since some of the 1247 // Wikis use slashes (/), which won't survive URLEncoding. 1248 // Besides, we don't know which character set the other Wiki 1249 // is using, so you'll have to write the entire name as it appears 1250 // in the URL. Bugger. 1251 1252 String extWiki = link.getExternalWiki(); 1253 String wikiPage = link.getExternalWikiPage(); 1254 1255 if( m_wysiwygEditorMode ) 1256 { 1257 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1258 } 1259 else 1260 { 1261 String urlReference = m_engine.getInterWikiURL( extWiki ); 1262 1263 if( urlReference != null ) 1264 { 1265 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1266 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1267 1268 if( m_linkParsingOperations.isImageLink(urlReference) ) 1269 { 1270 handleImageLink( urlReference, linktext, link.hasReference() ); 1271 } 1272 else 1273 { 1274 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1275 } 1276 1277 if( m_linkParsingOperations.isExternalLink(urlReference) ) 1278 { 1279 addElement( outlinkImage() ); 1280 } 1281 } 1282 else 1283 { 1284 Object[] args = { escapeHTMLEntities(extWiki) }; 1285 1286 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1287 } 1288 } 1289 } 1290 else if( linkref.startsWith("#") ) 1291 { 1292 // It defines a local footnote 1293 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1294 } 1295 else if( TextUtil.isNumber( linkref ) ) 1296 { 1297 // It defines a reference to a local footnote 1298 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1299 } 1300 else 1301 { 1302 int hashMark = -1; 1303 1304 // 1305 // Internal wiki link, but is it an attachment link? 1306 // 1307 String attachment = m_engine.getAttachmentManager().getAttachmentInfoName( m_context, linkref ); 1308 if( attachment != null ) 1309 { 1310 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1311 1312 if( m_linkParsingOperations.isImageLink( linkref ) ) 1313 { 1314 attachment = m_context.getURL( WikiContext.ATTACH, attachment ); 1315 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1316 } 1317 else 1318 { 1319 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1320 } 1321 } 1322 else if( (hashMark = linkref.indexOf('#')) != -1 ) 1323 { 1324 // It's an internal Wiki link, but to a named section 1325 1326 String namedSection = linkref.substring( hashMark+1 ); 1327 linkref = linkref.substring( 0, hashMark ); 1328 1329 linkref = MarkupParser.cleanLink( linkref ); 1330 1331 callMutatorChain( m_localLinkMutatorChain, linkref ); 1332 1333 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1334 if( matchedLink != null ) { 1335 String sectref = "section-"+m_engine.encodeName(matchedLink+"-"+wikifyLink(namedSection)); 1336 sectref = sectref.replace('%', '_'); 1337 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1338 } else { 1339 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1340 } 1341 } 1342 else 1343 { 1344 // It's an internal Wiki link 1345 linkref = MarkupParser.cleanLink( linkref ); 1346 1347 callMutatorChain( m_localLinkMutatorChain, linkref ); 1348 1349 String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1350 if( matchedLink != null ) { 1351 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1352 } else { 1353 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1354 } 1355 } 1356 } 1357 } 1358 catch( ParseException e ) 1359 { 1360 log.info("Parser failure: ",e); 1361 Object[] args = { e.getMessage() }; 1362 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1363 } 1364 1365 return m_currentElement; 1366 } 1367 1368 /** 1369 * Pushes back any string that has been read. It will obviously 1370 * be pushed back in a reverse order. 1371 * 1372 * @since 2.1.77 1373 */ 1374 private void pushBack( String s ) 1375 throws IOException 1376 { 1377 for( int i = s.length()-1; i >= 0; i-- ) 1378 { 1379 pushBack( s.charAt(i) ); 1380 } 1381 } 1382 1383 private Element handleBackslash() 1384 throws IOException 1385 { 1386 int ch = nextToken(); 1387 1388 if( ch == '\\' ) 1389 { 1390 int ch2 = nextToken(); 1391 1392 if( ch2 == '\\' ) 1393 { 1394 pushElement( new Element("br").setAttribute("clear","all")); 1395 return popElement("br"); 1396 } 1397 1398 pushBack( ch2 ); 1399 1400 pushElement( new Element("br") ); 1401 return popElement("br"); 1402 } 1403 1404 pushBack( ch ); 1405 1406 return null; 1407 } 1408 1409 private Element handleUnderscore() 1410 throws IOException 1411 { 1412 int ch = nextToken(); 1413 Element el = null; 1414 1415 if( ch == '_' ) 1416 { 1417 if( m_isbold ) 1418 { 1419 el = popElement("b"); 1420 } 1421 else 1422 { 1423 el = pushElement( new Element("b") ); 1424 } 1425 m_isbold = !m_isbold; 1426 } 1427 else 1428 { 1429 pushBack( ch ); 1430 } 1431 1432 return el; 1433 } 1434 1435 1436 /** 1437 * For example: italics. 1438 */ 1439 private Element handleApostrophe() 1440 throws IOException 1441 { 1442 int ch = nextToken(); 1443 Element el = null; 1444 1445 if( ch == '\'' ) 1446 { 1447 if( m_isitalic ) 1448 { 1449 el = popElement("i"); 1450 } 1451 else 1452 { 1453 el = pushElement( new Element("i") ); 1454 } 1455 m_isitalic = !m_isitalic; 1456 } 1457 else 1458 { 1459 pushBack( ch ); 1460 } 1461 1462 return el; 1463 } 1464 1465 private Element handleOpenbrace( boolean isBlock ) 1466 throws IOException 1467 { 1468 int ch = nextToken(); 1469 1470 if( ch == '{' ) 1471 { 1472 int ch2 = nextToken(); 1473 1474 if( ch2 == '{' ) 1475 { 1476 m_isPre = true; 1477 m_isEscaping = true; 1478 m_isPreBlock = isBlock; 1479 1480 if( isBlock ) 1481 { 1482 startBlockLevel(); 1483 return pushElement( new Element("pre") ); 1484 } 1485 1486 return pushElement( new Element("span").setAttribute("class","inline-code") ); 1487 } 1488 1489 pushBack( ch2 ); 1490 1491 return pushElement( new Element("tt") ); 1492 } 1493 1494 pushBack( ch ); 1495 1496 return null; 1497 } 1498 1499 /** 1500 * Handles both }} and }}} 1501 */ 1502 private Element handleClosebrace() 1503 throws IOException 1504 { 1505 int ch2 = nextToken(); 1506 1507 if( ch2 == '}' ) 1508 { 1509 int ch3 = nextToken(); 1510 1511 if( ch3 == '}' ) 1512 { 1513 if( m_isPre ) 1514 { 1515 if( m_isPreBlock ) 1516 { 1517 popElement( "pre" ); 1518 } 1519 else 1520 { 1521 popElement( "span" ); 1522 } 1523 1524 m_isPre = false; 1525 m_isEscaping = false; 1526 return m_currentElement; 1527 } 1528 1529 m_plainTextBuf.append("}}}"); 1530 return m_currentElement; 1531 } 1532 1533 pushBack( ch3 ); 1534 1535 if( !m_isEscaping ) 1536 { 1537 return popElement("tt"); 1538 } 1539 } 1540 1541 pushBack( ch2 ); 1542 1543 return null; 1544 } 1545 1546 private Element handleDash() 1547 throws IOException 1548 { 1549 int ch = nextToken(); 1550 1551 if( ch == '-' ) 1552 { 1553 int ch2 = nextToken(); 1554 1555 if( ch2 == '-' ) 1556 { 1557 int ch3 = nextToken(); 1558 1559 if( ch3 == '-' ) 1560 { 1561 // Empty away all the rest of the dashes. 1562 // Do not forget to return the first non-match back. 1563 do 1564 { 1565 ch = nextToken(); 1566 } 1567 while ( ch == '-' ); 1568 1569 pushBack(ch); 1570 startBlockLevel(); 1571 pushElement( new Element("hr") ); 1572 return popElement( "hr" ); 1573 } 1574 1575 pushBack( ch3 ); 1576 } 1577 pushBack( ch2 ); 1578 } 1579 1580 pushBack( ch ); 1581 1582 return null; 1583 } 1584 1585 private Element handleHeading() 1586 throws IOException 1587 { 1588 Element el = null; 1589 1590 int ch = nextToken(); 1591 1592 Heading hd = new Heading(); 1593 1594 if( ch == '!' ) 1595 { 1596 int ch2 = nextToken(); 1597 1598 if( ch2 == '!' ) 1599 { 1600 String title = peekAheadLine(); 1601 1602 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1603 } 1604 else 1605 { 1606 pushBack( ch2 ); 1607 String title = peekAheadLine(); 1608 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1609 } 1610 } 1611 else 1612 { 1613 pushBack( ch ); 1614 String title = peekAheadLine(); 1615 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1616 } 1617 1618 callHeadingListenerChain( hd ); 1619 1620 m_lastHeading = hd; 1621 1622 if( el != null ) pushElement(el); 1623 1624 return el; 1625 } 1626 1627 /** 1628 * Reads the stream until the next EOL or EOF. Note that it will also read the 1629 * EOL from the stream. 1630 */ 1631 private StringBuilder readUntilEOL() 1632 throws IOException 1633 { 1634 int ch; 1635 StringBuilder buf = new StringBuilder( 256 ); 1636 1637 while( true ) 1638 { 1639 ch = nextToken(); 1640 1641 if( ch == -1 ) 1642 break; 1643 1644 buf.append( (char) ch ); 1645 1646 if( ch == '\n' ) 1647 break; 1648 } 1649 return buf; 1650 } 1651 1652 /** Controls whether italic is restarted after a paragraph shift */ 1653 1654 private boolean m_restartitalic = false; 1655 private boolean m_restartbold = false; 1656 1657 private boolean m_newLine; 1658 1659 /** 1660 * Starts a block level element, therefore closing 1661 * a potential open paragraph tag. 1662 */ 1663 private void startBlockLevel() 1664 { 1665 // These may not continue over block level limits in XHTML 1666 1667 popElement("i"); 1668 popElement("b"); 1669 popElement("tt"); 1670 1671 if( m_isOpenParagraph ) 1672 { 1673 m_isOpenParagraph = false; 1674 popElement("p"); 1675 m_plainTextBuf.append("\n"); // Just small beautification 1676 } 1677 1678 m_restartitalic = m_isitalic; 1679 m_restartbold = m_isbold; 1680 1681 m_isitalic = false; 1682 m_isbold = false; 1683 } 1684 1685 private static String getListType( char c ) 1686 { 1687 if( c == '*' ) 1688 { 1689 return "ul"; 1690 } 1691 else if( c == '#' ) 1692 { 1693 return "ol"; 1694 } 1695 throw new InternalWikiException("Parser got faulty list type: "+c); 1696 } 1697 /** 1698 * Like original handleOrderedList() and handleUnorderedList() 1699 * however handles both ordered ('#') and unordered ('*') mixed together. 1700 */ 1701 1702 // FIXME: Refactor this; it's a bit messy. 1703 1704 private Element handleGeneralList() 1705 throws IOException 1706 { 1707 startBlockLevel(); 1708 1709 String strBullets = readWhile( "*#" ); 1710 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1711 int numBullets = strBullets.length(); 1712 1713 // override the beginning portion of bullet pattern to be like the previous 1714 // to simulate PHPWiki style lists 1715 1716 if(m_allowPHPWikiStyleLists) 1717 { 1718 // only substitute if different 1719 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1720 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 1721 { 1722 if(numBullets <= m_genlistlevel) 1723 { 1724 // Substitute all but the last character (keep the expressed bullet preference) 1725 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 1726 + strBullets.substring(numBullets-1, numBullets); 1727 } 1728 else 1729 { 1730 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 1731 } 1732 } 1733 } 1734 1735 // 1736 // Check if this is still of the same type 1737 // 1738 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1739 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 1740 { 1741 if( numBullets > m_genlistlevel ) 1742 { 1743 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 1744 1745 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 1746 { 1747 // bullets are growing, get from new bullet list 1748 pushElement( new Element("li") ); 1749 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 1750 } 1751 } 1752 else if( numBullets < m_genlistlevel ) 1753 { 1754 // Close the previous list item. 1755 // buf.append( m_renderer.closeListItem() ); 1756 popElement( "li" ); 1757 1758 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 1759 { 1760 // bullets are shrinking, get from old bullet list 1761 1762 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1763 if( m_genlistlevel > 0 ) 1764 { 1765 popElement( "li" ); 1766 } 1767 1768 } 1769 } 1770 else 1771 { 1772 if( m_genlistlevel > 0 ) 1773 { 1774 popElement( "li" ); 1775 } 1776 } 1777 } 1778 else 1779 { 1780 // 1781 // The pattern has changed, unwind and restart 1782 // 1783 int numEqualBullets; 1784 int numCheckBullets; 1785 1786 // find out how much is the same 1787 numEqualBullets = 0; 1788 numCheckBullets = Math.min(numBullets,m_genlistlevel); 1789 1790 while( numEqualBullets < numCheckBullets ) 1791 { 1792 // if the bullets are equal so far, keep going 1793 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 1794 numEqualBullets++; 1795 // otherwise giveup, we have found how many are equal 1796 else 1797 break; 1798 } 1799 1800 //unwind 1801 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 1802 { 1803 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 1804 if( m_genlistlevel > numBullets ) 1805 { 1806 popElement("li"); 1807 } 1808 } 1809 1810 //rewind 1811 1812 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 1813 for(int i = numEqualBullets; i < numBullets; i++) 1814 { 1815 pushElement( new Element("li") ); 1816 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 1817 } 1818 m_genlistlevel = numBullets; 1819 } 1820 1821 // 1822 // Push a new list item, and eat away any extra whitespace 1823 // 1824 pushElement( new Element("li") ); 1825 readWhile(" "); 1826 1827 // work done, remember the new bullet list (in place of old one) 1828 m_genlistBulletBuffer.setLength(0); 1829 m_genlistBulletBuffer.append(strBullets); 1830 1831 return m_currentElement; 1832 } 1833 1834 private Element unwindGeneralList() 1835 { 1836 //unwind 1837 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 1838 { 1839 popElement( "li" ); 1840 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1841 } 1842 1843 m_genlistBulletBuffer.setLength(0); 1844 1845 return null; 1846 } 1847 1848 1849 private Element handleDefinitionList() 1850 throws IOException 1851 { 1852 if( !m_isdefinition ) 1853 { 1854 m_isdefinition = true; 1855 1856 startBlockLevel(); 1857 1858 pushElement( new Element("dl") ); 1859 return pushElement( new Element("dt") ); 1860 } 1861 1862 return null; 1863 } 1864 1865 private Element handleOpenbracket() 1866 throws IOException 1867 { 1868 StringBuilder sb = new StringBuilder(40); 1869 int pos = getPosition(); 1870 int ch = nextToken(); 1871 boolean isPlugin = false; 1872 1873 if( ch == '[' ) 1874 { 1875 if( m_wysiwygEditorMode ) 1876 { 1877 sb.append( '[' ); 1878 } 1879 1880 sb.append( (char)ch ); 1881 1882 while( (ch = nextToken()) == '[' ) 1883 { 1884 sb.append( (char)ch ); 1885 } 1886 } 1887 1888 1889 if( ch == '{' ) 1890 { 1891 isPlugin = true; 1892 } 1893 1894 pushBack( ch ); 1895 1896 if( sb.length() > 0 ) 1897 { 1898 m_plainTextBuf.append( sb ); 1899 return m_currentElement; 1900 } 1901 1902 // 1903 // Find end of hyperlink 1904 // 1905 1906 ch = nextToken(); 1907 int nesting = 1; // Check for nested plugins 1908 1909 while( ch != -1 ) 1910 { 1911 int ch2 = nextToken(); pushBack(ch2); 1912 1913 if( isPlugin ) 1914 { 1915 if( ch == '[' && ch2 == '{' ) 1916 { 1917 nesting++; 1918 } 1919 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 1920 { 1921 break; 1922 } 1923 else if( ch == '}' && ch2 == ']' ) 1924 { 1925 // NB: This will be decremented once at the end 1926 nesting--; 1927 } 1928 } 1929 else 1930 { 1931 if( ch == ']' ) 1932 { 1933 break; 1934 } 1935 } 1936 1937 sb.append( (char) ch ); 1938 1939 ch = nextToken(); 1940 } 1941 1942 // 1943 // If the link is never finished, do some tricks to display the rest of the line 1944 // unchanged. 1945 // 1946 if( ch == -1 ) 1947 { 1948 log.debug("Warning: unterminated link detected!"); 1949 m_isEscaping = true; 1950 m_plainTextBuf.append( sb ); 1951 flushPlainText(); 1952 m_isEscaping = false; 1953 return m_currentElement; 1954 } 1955 1956 return handleHyperlinks( sb.toString(), pos ); 1957 } 1958 1959 /** 1960 * Reads the stream until the current brace is closed or stream end. 1961 */ 1962 private String readBraceContent( char opening, char closing ) 1963 throws IOException 1964 { 1965 StringBuilder sb = new StringBuilder(40); 1966 int braceLevel = 1; 1967 int ch; 1968 while(( ch = nextToken() ) != -1 ) 1969 { 1970 if( ch == '\\' ) 1971 { 1972 continue; 1973 } 1974 else if ( ch == opening ) 1975 { 1976 braceLevel++; 1977 } 1978 else if ( ch == closing ) 1979 { 1980 braceLevel--; 1981 if (braceLevel==0) 1982 { 1983 break; 1984 } 1985 } 1986 sb.append( (char)ch ); 1987 } 1988 return sb.toString(); 1989 } 1990 1991 1992 /** 1993 * Handles constructs of type %%(style) and %%class 1994 * @param newLine 1995 * @return An Element containing the div or span, depending on the situation. 1996 * @throws IOException 1997 */ 1998 private Element handleDiv( boolean newLine ) 1999 throws IOException 2000 { 2001 int ch = nextToken(); 2002 Element el = null; 2003 2004 if( ch == '%' ) 2005 { 2006 String style = null; 2007 String clazz = null; 2008 2009 ch = nextToken(); 2010 2011 // 2012 // Style or class? 2013 // 2014 if( ch == '(' ) 2015 { 2016 style = readBraceContent('(',')'); 2017 } 2018 else if( Character.isLetter( (char) ch ) ) 2019 { 2020 pushBack( ch ); 2021 clazz = readUntil( "( \t\n\r" ); 2022 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 2023 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 2024 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 2025 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 2026 2027 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 2028 //(2) remove any invalid character 2029 if( clazz != null){ 2030 2031 clazz = clazz.replace('.', ' ') 2032 .replaceAll("[^\\s-_\\w\\x200-\\x377]+",""); 2033 2034 } 2035 ch = nextToken(); 2036 2037 //check for %%class1.class2( style information ) 2038 if( ch == '(' ) 2039 { 2040 style = readBraceContent('(',')'); 2041 } 2042 // 2043 // Pop out only spaces, so that the upcoming EOL check does not check the 2044 // next line. 2045 // 2046 else if( ch == '\n' || ch == '\r' ) 2047 { 2048 pushBack(ch); 2049 } 2050 } 2051 else 2052 { 2053 // 2054 // Anything else stops. 2055 // 2056 2057 pushBack(ch); 2058 2059 try 2060 { 2061 Boolean isSpan = m_styleStack.pop(); 2062 2063 if( isSpan == null ) 2064 { 2065 // Fail quietly 2066 } 2067 else if( isSpan.booleanValue() ) 2068 { 2069 el = popElement( "span" ); 2070 } 2071 else 2072 { 2073 el = popElement( "div" ); 2074 } 2075 } 2076 catch( EmptyStackException e ) 2077 { 2078 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 2079 return m_currentElement; 2080 } 2081 2082 return el; 2083 } 2084 2085 // 2086 // Check if there is an attempt to do something nasty 2087 // 2088 2089 try 2090 { 2091 style = StringEscapeUtils.unescapeHtml4(style); 2092 if( style != null && style.indexOf("javascript:") != -1 ) 2093 { 2094 log.debug("Attempt to output javascript within CSS:"+style); 2095 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2096 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 2097 } 2098 } 2099 catch( NumberFormatException e ) 2100 { 2101 // 2102 // If there are unknown entities, we don't want the parser to stop. 2103 // 2104 ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 2105 String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 2106 return addElement( makeError( msg ) ); 2107 } 2108 2109 // 2110 // Decide if we should open a div or a span? 2111 // 2112 String eol = peekAheadLine(); 2113 2114 if( eol.trim().length() > 0 ) 2115 { 2116 // There is stuff after the class 2117 2118 el = new Element("span"); 2119 2120 m_styleStack.push( Boolean.TRUE ); 2121 } 2122 else 2123 { 2124 startBlockLevel(); 2125 el = new Element("div"); 2126 m_styleStack.push( Boolean.FALSE ); 2127 } 2128 2129 if( style != null ) el.setAttribute("style", style); 2130 if( clazz != null ) el.setAttribute("class", clazz); 2131 el = pushElement( el ); 2132 2133 return el; 2134 } 2135 2136 pushBack(ch); 2137 2138 return el; 2139 } 2140 2141 private Element handleSlash( boolean newLine ) 2142 throws IOException 2143 { 2144 int ch = nextToken(); 2145 2146 pushBack(ch); 2147 if( ch == '%' && !m_styleStack.isEmpty() ) 2148 { 2149 return handleDiv( newLine ); 2150 } 2151 2152 return null; 2153 } 2154 2155 private Element handleBar( boolean newLine ) 2156 throws IOException 2157 { 2158 Element el = null; 2159 2160 if( !m_istable && !newLine ) 2161 { 2162 return null; 2163 } 2164 2165 // 2166 // If the bar is in the first column, we will either start 2167 // a new table or continue the old one. 2168 // 2169 2170 if( newLine ) 2171 { 2172 if( !m_istable ) 2173 { 2174 startBlockLevel(); 2175 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2176 m_istable = true; 2177 m_rowNum = 0; 2178 } 2179 2180 m_rowNum++; 2181 Element tr = ( m_rowNum % 2 != 0 ) 2182 ? new Element("tr").setAttribute("class", "odd") 2183 : new Element("tr"); 2184 el = pushElement( tr ); 2185 } 2186 2187 // 2188 // Check out which table cell element to start; 2189 // a header element (th) or a regular element (td). 2190 // 2191 int ch = nextToken(); 2192 2193 if( ch == '|' ) 2194 { 2195 if( !newLine ) 2196 { 2197 el = popElement("th"); 2198 if( el == null ) popElement("td"); 2199 } 2200 el = pushElement( new Element("th") ); 2201 } 2202 else 2203 { 2204 if( !newLine ) 2205 { 2206 el = popElement("td"); 2207 if( el == null ) popElement("th"); 2208 } 2209 2210 el = pushElement( new Element("td") ); 2211 2212 pushBack( ch ); 2213 } 2214 2215 return el; 2216 } 2217 2218 /** 2219 * Generic escape of next character or entity. 2220 */ 2221 private Element handleTilde() 2222 throws IOException 2223 { 2224 int ch = nextToken(); 2225 2226 if( ch == ' ' ) 2227 { 2228 if( m_wysiwygEditorMode ) 2229 { 2230 m_plainTextBuf.append( "~ " ); 2231 } 2232 return m_currentElement; 2233 } 2234 2235 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2236 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2237 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2238 { 2239 if( m_wysiwygEditorMode ) 2240 { 2241 m_plainTextBuf.append( '~' ); 2242 } 2243 2244 m_plainTextBuf.append( (char)ch ); 2245 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2246 return m_currentElement; 2247 } 2248 2249 // No escape. 2250 pushBack( ch ); 2251 2252 return null; 2253 } 2254 2255 private void fillBuffer( Element startElement ) 2256 throws IOException 2257 { 2258 m_currentElement = startElement; 2259 2260 boolean quitReading = false; 2261 m_newLine = true; 2262 disableOutputEscaping(); 2263 2264 while(!quitReading) 2265 { 2266 int ch = nextToken(); 2267 2268 if( ch == -1 ) break; 2269 2270 // 2271 // Check if we're actually ending the preformatted mode. 2272 // We still must do an entity transformation here. 2273 // 2274 if( m_isEscaping ) 2275 { 2276 if( ch == '}' ) 2277 { 2278 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2279 } 2280 else if( ch == -1 ) 2281 { 2282 quitReading = true; 2283 } 2284 else if( ch == '\r' ) 2285 { 2286 // DOS line feeds we ignore. 2287 } 2288 else if( ch == '<' ) 2289 { 2290 m_plainTextBuf.append( "<" ); 2291 } 2292 else if( ch == '>' ) 2293 { 2294 m_plainTextBuf.append( ">" ); 2295 } 2296 else if( ch == '&' ) 2297 { 2298 m_plainTextBuf.append( "&" ); 2299 } 2300 else if( ch == '~' ) 2301 { 2302 String braces = readWhile("}"); 2303 if( braces.length() >= 3 ) 2304 { 2305 m_plainTextBuf.append("}}}"); 2306 2307 braces = braces.substring(3); 2308 } 2309 else 2310 { 2311 m_plainTextBuf.append( (char) ch ); 2312 } 2313 2314 for( int i = braces.length()-1; i >= 0; i-- ) 2315 { 2316 pushBack(braces.charAt(i)); 2317 } 2318 } 2319 else 2320 { 2321 m_plainTextBuf.append( (char) ch ); 2322 } 2323 2324 continue; 2325 } 2326 2327 // 2328 // An empty line stops a list 2329 // 2330 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2331 { 2332 m_plainTextBuf.append(unwindGeneralList()); 2333 } 2334 2335 if( m_newLine && ch != '|' && m_istable ) 2336 { 2337 popElement("table"); 2338 m_istable = false; 2339 } 2340 2341 int skip = IGNORE; 2342 2343 // 2344 // Do the actual parsing and catch any errors. 2345 // 2346 try 2347 { 2348 skip = parseToken( ch ); 2349 } 2350 catch( IllegalDataException e ) 2351 { 2352 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2353 2354 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2355 } 2356 2357 // 2358 // The idea is as follows: If the handler method returns 2359 // an element (el != null), it is assumed that it has been 2360 // added in the stack. Otherwise the character is added 2361 // as is to the plaintext buffer. 2362 // 2363 // For the transition phase, if s != null, it also gets 2364 // added in the plaintext buffer. 2365 // 2366 2367 switch( skip ) 2368 { 2369 case ELEMENT: 2370 m_newLine = false; 2371 break; 2372 2373 case CHARACTER: 2374 m_plainTextBuf.append( (char) ch ); 2375 m_newLine = false; 2376 break; 2377 2378 case IGNORE: 2379 default: 2380 break; 2381 } 2382 } 2383 2384 closeHeadings(); 2385 popElement("domroot"); 2386 } 2387 2388 private String cleanupSuspectData( String s ) 2389 { 2390 StringBuilder sb = new StringBuilder( s.length() ); 2391 2392 for( int i = 0; i < s.length(); i++ ) 2393 { 2394 char c = s.charAt(i); 2395 2396 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2397 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); 2398 } 2399 2400 return sb.toString(); 2401 } 2402 2403 /** The token is a plain character. */ 2404 protected static final int CHARACTER = 0; 2405 2406 /** The token is a wikimarkup element. */ 2407 protected static final int ELEMENT = 1; 2408 2409 /** The token is to be ignored. */ 2410 protected static final int IGNORE = 2; 2411 2412 /** 2413 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2414 * you think this was a wiki markup element, and IGNORE, if you think 2415 * we should ignore this altogether. 2416 * <p> 2417 * To add your own MarkupParser, you can override this method, but it 2418 * is recommended that you call super.parseToken() as well to gain advantage 2419 * of JSPWiki's own markup. You can call it at the start of your own 2420 * parseToken() or end - it does not matter. 2421 * 2422 * @param ch The character under investigation 2423 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2424 * @throws IOException If parsing fails. 2425 */ 2426 protected int parseToken( int ch ) 2427 throws IOException 2428 { 2429 Element el = null; 2430 2431 // 2432 // Now, check the incoming token. 2433 // 2434 switch( ch ) 2435 { 2436 case '\r': 2437 // DOS linefeeds we forget 2438 return IGNORE; 2439 2440 case '\n': 2441 // 2442 // Close things like headings, etc. 2443 // 2444 2445 // FIXME: This is not really very fast 2446 2447 closeHeadings(); 2448 2449 popElement("dl"); // Close definition lists. 2450 if( m_istable ) 2451 { 2452 popElement("tr"); 2453 } 2454 2455 m_isdefinition = false; 2456 2457 if( m_newLine ) 2458 { 2459 // Paragraph change. 2460 startBlockLevel(); 2461 2462 // 2463 // Figure out which elements cannot be enclosed inside 2464 // a <p></p> pair according to XHTML rules. 2465 // 2466 String nextLine = peekAheadLine(); 2467 if( nextLine.length() == 0 || 2468 (nextLine.length() > 0 && 2469 !nextLine.startsWith("{{{") && 2470 !nextLine.startsWith("----") && 2471 !nextLine.startsWith("%%") && 2472 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2473 { 2474 pushElement( new Element("p") ); 2475 m_isOpenParagraph = true; 2476 2477 if( m_restartitalic ) 2478 { 2479 pushElement( new Element("i") ); 2480 m_isitalic = true; 2481 m_restartitalic = false; 2482 } 2483 if( m_restartbold ) 2484 { 2485 pushElement( new Element("b") ); 2486 m_isbold = true; 2487 m_restartbold = false; 2488 } 2489 } 2490 } 2491 else 2492 { 2493 m_plainTextBuf.append("\n"); 2494 m_newLine = true; 2495 } 2496 return IGNORE; 2497 2498 2499 case '\\': 2500 el = handleBackslash(); 2501 break; 2502 2503 case '_': 2504 el = handleUnderscore(); 2505 break; 2506 2507 case '\'': 2508 el = handleApostrophe(); 2509 break; 2510 2511 case '{': 2512 el = handleOpenbrace( m_newLine ); 2513 break; 2514 2515 case '}': 2516 el = handleClosebrace(); 2517 break; 2518 2519 case '-': 2520 if( m_newLine ) 2521 el = handleDash(); 2522 2523 break; 2524 2525 case '!': 2526 if( m_newLine ) 2527 { 2528 el = handleHeading(); 2529 } 2530 break; 2531 2532 case ';': 2533 if( m_newLine ) 2534 { 2535 el = handleDefinitionList(); 2536 } 2537 break; 2538 2539 case ':': 2540 if( m_isdefinition ) 2541 { 2542 popElement("dt"); 2543 el = pushElement( new Element("dd") ); 2544 m_isdefinition = false; 2545 } 2546 break; 2547 2548 case '[': 2549 el = handleOpenbracket(); 2550 break; 2551 2552 case '*': 2553 if( m_newLine ) 2554 { 2555 pushBack('*'); 2556 el = handleGeneralList(); 2557 } 2558 break; 2559 2560 case '#': 2561 if( m_newLine ) 2562 { 2563 pushBack('#'); 2564 el = handleGeneralList(); 2565 } 2566 break; 2567 2568 case '|': 2569 el = handleBar( m_newLine ); 2570 break; 2571 2572 case '~': 2573 el = handleTilde(); 2574 break; 2575 2576 case '%': 2577 el = handleDiv( m_newLine ); 2578 break; 2579 2580 case '/': 2581 el = handleSlash( m_newLine ); 2582 break; 2583 2584 default: 2585 break; 2586 } 2587 2588 return el != null ? ELEMENT : CHARACTER; 2589 } 2590 2591 private void closeHeadings() 2592 { 2593 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2594 { 2595 // Add the hash anchor element at the end of the heading 2596 addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2597 m_lastHeading = null; 2598 } 2599 popElement("h2"); 2600 popElement("h3"); 2601 popElement("h4"); 2602 } 2603 2604 /** 2605 * Parses the entire document from the Reader given in the constructor or 2606 * set by {@link #setInputReader(Reader)}. 2607 * 2608 * @return A WikiDocument, ready to be passed to the renderer. 2609 * @throws IOException If parsing cannot be accomplished. 2610 */ 2611 @Override 2612 public WikiDocument parse() 2613 throws IOException 2614 { 2615 WikiDocument d = new WikiDocument( m_context.getPage() ); 2616 d.setContext( m_context ); 2617 2618 Element rootElement = new Element("domroot"); 2619 2620 d.setRootElement( rootElement ); 2621 2622 fillBuffer( rootElement ); 2623 2624 paragraphify(rootElement); 2625 2626 return d; 2627 } 2628 2629 /** 2630 * Checks out that the first paragraph is correctly installed. 2631 * 2632 * @param rootElement 2633 */ 2634 private void paragraphify(Element rootElement) 2635 { 2636 // 2637 // Add the paragraph tag to the first paragraph 2638 // 2639 List< Content > kids = rootElement.getContent(); 2640 2641 if( rootElement.getChild("p") != null ) 2642 { 2643 ArrayList<Content> ls = new ArrayList<>(); 2644 int idxOfFirstContent = 0; 2645 int count = 0; 2646 2647 for( Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) 2648 { 2649 Content c = i.next(); 2650 if( c instanceof Element ) 2651 { 2652 String name = ( ( Element )c ).getName(); 2653 if( isBlockLevel( name ) ) break; 2654 } 2655 2656 if( !(c instanceof ProcessingInstruction) ) 2657 { 2658 ls.add( c ); 2659 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2660 } 2661 } 2662 2663 // 2664 // If there were any elements, then add a new <p> (unless it would 2665 // be an empty one) 2666 // 2667 if( ls.size() > 0 ) 2668 { 2669 Element newel = new Element("p"); 2670 2671 for( Iterator< Content > i = ls.iterator(); i.hasNext(); ) 2672 { 2673 Content c = i.next(); 2674 2675 c.detach(); 2676 newel.addContent(c); 2677 } 2678 2679 // 2680 // Make sure there are no empty <p/> tags added. 2681 // 2682 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) 2683 rootElement.addContent(idxOfFirstContent, newel); 2684 } 2685 } 2686 } 2687 2688}