001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.commons.text.StringEscapeUtils; 023import org.apache.log4j.Logger; 024import org.apache.oro.text.regex.MalformedPatternException; 025import org.apache.oro.text.regex.MatchResult; 026import org.apache.oro.text.regex.Pattern; 027import org.apache.oro.text.regex.PatternCompiler; 028import org.apache.oro.text.regex.PatternMatcher; 029import org.apache.oro.text.regex.Perl5Compiler; 030import org.apache.oro.text.regex.Perl5Matcher; 031import org.apache.wiki.InternalWikiException; 032import org.apache.wiki.StringTransmutator; 033import org.apache.wiki.api.core.Acl; 034import org.apache.wiki.api.core.Context; 035import org.apache.wiki.api.core.ContextEnum; 036import org.apache.wiki.api.core.Page; 037import org.apache.wiki.api.exceptions.PluginException; 038import org.apache.wiki.api.plugin.Plugin; 039import org.apache.wiki.api.spi.Wiki; 040import org.apache.wiki.attachment.AttachmentManager; 041import org.apache.wiki.auth.AuthorizationManager; 042import org.apache.wiki.auth.UserManager; 043import org.apache.wiki.auth.WikiSecurityException; 044import org.apache.wiki.auth.acl.AclManager; 045import org.apache.wiki.i18n.InternationalizationManager; 046import org.apache.wiki.preferences.Preferences; 047import org.apache.wiki.util.TextUtil; 048import org.apache.wiki.util.XmlUtil; 049import org.apache.wiki.variables.VariableManager; 050import org.jdom2.Attribute; 051import org.jdom2.Content; 052import org.jdom2.Element; 053import org.jdom2.IllegalDataException; 054import org.jdom2.ProcessingInstruction; 055import org.jdom2.Verifier; 056 057import javax.xml.transform.Result; 058import java.io.IOException; 059import java.io.Reader; 060import java.io.StringReader; 061import java.text.MessageFormat; 062import java.util.ArrayList; 063import java.util.Arrays; 064import java.util.Collection; 065import java.util.EmptyStackException; 066import java.util.HashMap; 067import java.util.Iterator; 068import java.util.List; 069import java.util.Map; 070import java.util.Properties; 071import java.util.ResourceBundle; 072import java.util.Stack; 073 074/** 075 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 076 * heart and soul of JSPWiki : make sure you test properly anything that is added, 077 * or else it breaks down horribly. 078 * 079 * @since 2.4 080 */ 081public class JSPWikiMarkupParser extends MarkupParser { 082 083 protected static final int READ = 0; 084 protected static final int EDIT = 1; 085 protected static final int EMPTY = 2; // Empty message 086 protected static final int LOCAL = 3; 087 protected static final int LOCALREF = 4; 088 protected static final int IMAGE = 5; 089 protected static final int EXTERNAL = 6; 090 protected static final int INTERWIKI = 7; 091 protected static final int IMAGELINK = 8; 092 protected static final int IMAGEWIKILINK = 9; 093 protected static final int ATTACHMENT = 10; 094 095 private static final Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); 096 097 private boolean m_isbold = false; 098 private boolean m_isitalic = false; 099 private boolean m_istable = false; 100 private boolean m_isPre = false; 101 private boolean m_isEscaping = false; 102 private boolean m_isdefinition = false; 103 private boolean m_isPreBlock = false; 104 105 /** Contains style information, in multiple forms. */ 106 private Stack<Boolean> m_styleStack = new Stack<>(); 107 108 // general list handling 109 private int m_genlistlevel = 0; 110 private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 111 private boolean m_allowPHPWikiStyleLists = true; 112 113 private boolean m_isOpenParagraph = false; 114 115 /** Parser for extended link functionality. */ 116 private LinkParser m_linkParser = new LinkParser(); 117 118 /** Keeps track of any plain text that gets put in the Text nodes */ 119 private StringBuilder m_plainTextBuf = new StringBuilder(20); 120 121 private Element m_currentElement; 122 123 /** Keep track of duplicate header names. */ 124 private Map<String, Integer> m_titleSectionCounter = new HashMap<>(); 125 126 /** If true, then considers CamelCase links as well. */ 127 private boolean m_camelCaseLinks = false; 128 129 /** If true, then generate special output for wysiwyg editing in certain cases */ 130 private boolean m_wysiwygEditorMode = false; 131 132 /** If true, consider URIs that have no brackets as well. */ 133 // FIXME: Currently reserved, but not used. 134 private boolean m_plainUris = false; 135 136 /** If true, all outward links use a small link image. */ 137 private boolean m_useOutlinkImage = true; 138 139 private boolean m_useAttachmentImage = true; 140 141 /** If true, allows raw HTML. */ 142 private boolean m_allowHTML = false; 143 144 private boolean m_useRelNofollow = false; 145 146 private PatternCompiler m_compiler = new Perl5Compiler(); 147 148 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 149 150 private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 151 private Pattern m_camelCasePattern; 152 153 private int m_rowNum = 1; 154 155 private Heading m_lastHeading = null; 156 157 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 158 159 /** 160 * Creates a markup parser. 161 * 162 * @param context The WikiContext which controls the parsing 163 * @param in Where the data is read from. 164 */ 165 public JSPWikiMarkupParser( final Context context, final Reader in ) 166 { 167 super( context, in ); 168 initialize(); 169 } 170 171 // FIXME: parsers should be pooled for better performance. 172 private void initialize() { 173 initInlineImagePatterns(); 174 175 m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN ); 176 if( m_camelCasePattern == null ) { 177 try { 178 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 179 } catch( final MalformedPatternException e ) { 180 log.fatal("Internal error: Someone put in a faulty pattern.",e); 181 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 182 } 183 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 184 } 185 186 // Set the properties. 187 final Properties props = m_engine.getWikiProperties(); 188 final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 189 190 if( cclinks != null ) { 191 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 192 } else { 193 m_camelCaseLinks = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks ); 194 } 195 196 final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE ); 197 if( wysiwygVariable != null ) { 198 m_wysiwygEditorMode = wysiwygVariable; 199 } 200 201 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 202 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 203 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 204 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 205 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 206 207 if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) { 208 disableAccessRules(); 209 } 210 211 m_context.getPage().setHasMetadata(); 212 } 213 214 /** 215 * Calls a transmutator chain. 216 * 217 * @param list Chain to call 218 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 219 * @return The result of the mutation. 220 */ 221 protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) { 222 if( list == null || list.size() == 0 ) { 223 return text; 224 } 225 226 for( final StringTransmutator m : list ) { 227 text = m.mutate( m_context, text ); 228 } 229 230 return text; 231 } 232 233 /** 234 * Calls the heading listeners. 235 * 236 * @param param A Heading object. 237 */ 238 protected void callHeadingListenerChain( final Heading param ) { 239 final List< HeadingListener > list = m_headingListenerChain; 240 for( final HeadingListener h : list ) { 241 h.headingAdded( m_context, param ); 242 } 243 } 244 245 /** 246 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 247 * if you really know what you are doing. 248 * 249 * @param type One of the types above 250 * @param link URL to which to link to 251 * @param text Link text 252 * @param section If a particular section identifier is required. 253 * @return An A element. 254 * @since 2.4.78 255 */ 256 protected Element createAnchor( final int type, final String link, String text, String section) 257 { 258 text = escapeHTMLEntities( text ); 259 section = escapeHTMLEntities( section ); 260 final Element el = new Element("a"); 261 el.setAttribute("class",CLASS_TYPES[type]); 262 el.setAttribute("href",link+section); 263 el.addContent(text); 264 return el; 265 } 266 267 private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) 268 { 269 Element el = null; 270 271 if( text == null ) text = link; 272 273 text = callMutatorChain( m_linkMutators, text ); 274 275 section = (section != null) ? ("#"+section) : ""; 276 277 // Make sure we make a link name that can be accepted 278 // as a valid URL. 279 280 if( link.length() == 0 ) 281 { 282 type = EMPTY; 283 } 284 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 285 286 switch(type) 287 { 288 case READ: 289 el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section ); 290 break; 291 292 case EDIT: 293 el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" ); 294 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 295 296 break; 297 298 case EMPTY: 299 el = new Element("u").addContent(text); 300 break; 301 302 // 303 // These two are for local references - footnotes and 304 // references to footnotes. 305 // We embed the page name (or whatever WikiContext gives us) 306 // to make sure the links are unique across Wiki. 307 // 308 case LOCALREF: 309 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 310 break; 311 312 case LOCAL: 313 el = new Element("a").setAttribute("class",CLASS_FOOTNOTE); 314 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 315 el.addContent("["+text+"]"); 316 break; 317 318 // 319 // With the image, external and interwiki types we need to 320 // make sure nobody can put in Javascript or something else 321 // annoying into the links themselves. We do this by preventing 322 // a haxor from stopping the link name short with quotes in 323 // fillBuffer(). 324 // 325 case IMAGE: 326 el = new Element("img").setAttribute("class","inline"); 327 el.setAttribute("src",link); 328 el.setAttribute("alt",text); 329 break; 330 331 case IMAGELINK: 332 el = new Element("img").setAttribute("class","inline"); 333 el.setAttribute("src",link); 334 el.setAttribute("alt",text); 335 el = createAnchor(IMAGELINK,text,"","").addContent(el); 336 break; 337 338 case IMAGEWIKILINK: 339 final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text ); 340 el = new Element("img").setAttribute("class","inline"); 341 el.setAttribute("src",link); 342 el.setAttribute("alt",text); 343 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 344 break; 345 346 case EXTERNAL: 347 el = createAnchor( EXTERNAL, link, text, section ); 348 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 349 break; 350 351 case INTERWIKI: 352 el = createAnchor( INTERWIKI, link, text, section ); 353 break; 354 355 case ATTACHMENT: 356 final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link ); 357 final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link ); 358 final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" ); 359 el = createAnchor( ATTACHMENT, attlink, text, "" ); 360 361 if( m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) { 362 el.setAttribute("download", ""); 363 } 364 365 pushElement(el); 366 popElement(el.getName()); 367 368 if( m_useAttachmentImage ) 369 { 370 el = new Element("img").setAttribute("src",imglink); 371 el.setAttribute("border","0"); 372 el.setAttribute("alt","(info)"); 373 374 el = new Element("a").setAttribute("href",infolink).addContent(el); 375 el.setAttribute("class","infolink"); 376 } 377 else 378 { 379 el = null; 380 } 381 break; 382 383 default: 384 break; 385 } 386 387 if( el != null && attributes != null ) 388 { 389 while( attributes.hasNext() ) 390 { 391 final Attribute attr = attributes.next(); 392 if( attr != null ) 393 { 394 el.setAttribute(attr); 395 } 396 } 397 } 398 399 if( el != null ) 400 { 401 flushPlainText(); 402 m_currentElement.addContent( el ); 403 } 404 return el; 405 } 406 407 /** 408 * These are all of the HTML 4.01 block-level elements. 409 */ 410 private static final String[] BLOCK_ELEMENTS = { 411 "address", "blockquote", "div", "dl", "fieldset", "form", 412 "h1", "h2", "h3", "h4", "h5", "h6", 413 "hr", "noscript", "ol", "p", "pre", "table", "ul" 414 }; 415 416 private static boolean isBlockLevel( final String name ) 417 { 418 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 419 } 420 421 /** 422 * This method peeks ahead in the stream until EOL and returns the result. 423 * It will keep the buffers untouched. 424 * 425 * @return The string from the current position to the end of line. 426 */ 427 428 // FIXME: Always returns an empty line, even if the stream is full. 429 private String peekAheadLine() 430 throws IOException 431 { 432 final String s = readUntilEOL().toString(); 433 434 if( s.length() > PUSHBACK_BUFFER_SIZE ) 435 { 436 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 437 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 438 } 439 else 440 { 441 try 442 { 443 pushBack( s ); 444 } 445 catch( final IOException e ) 446 { 447 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 448 } 449 } 450 return s; 451 } 452 453 private int flushPlainText() 454 { 455 final int numChars = m_plainTextBuf.length(); 456 457 if( numChars > 0 ) 458 { 459 String buf; 460 461 if( !m_allowHTML ) 462 { 463 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 464 } 465 else 466 { 467 buf = m_plainTextBuf.toString(); 468 } 469 // 470 // We must first empty the buffer because the side effect of 471 // calling makeCamelCaseLink() is to call this routine. 472 // 473 474 m_plainTextBuf = new StringBuilder(20); 475 476 try 477 { 478 // 479 // This is the heaviest part of parsing, and therefore we can 480 // do some optimization here. 481 // 482 // 1) Only when the length of the buffer is big enough, we try to do the match 483 // 484 485 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 486 { 487 // System.out.println("Buffer="+buf); 488 489 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 490 { 491 final MatchResult result = m_camelCaseMatcher.getMatch(); 492 493 final String firstPart = buf.substring(0,result.beginOffset(0)); 494 String prefix = result.group(1); 495 496 if( prefix == null ) prefix = ""; 497 498 final String camelCase = result.group(2); 499 final String protocol = result.group(3); 500 String uri = protocol+result.group(4); 501 buf = buf.substring(result.endOffset(0)); 502 503 m_currentElement.addContent( firstPart ); 504 505 // 506 // Check if the user does not wish to do URL or WikiWord expansion 507 // 508 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 509 { 510 if( prefix.endsWith("~") ) 511 { 512 if( m_wysiwygEditorMode ) 513 { 514 m_currentElement.addContent( "~" ); 515 } 516 prefix = prefix.substring(0,prefix.length()-1); 517 } 518 if( camelCase != null ) 519 { 520 m_currentElement.addContent( prefix+camelCase ); 521 } 522 else if( protocol != null ) 523 { 524 m_currentElement.addContent( prefix+uri ); 525 } 526 continue; 527 } 528 529 // 530 // Fine, then let's check what kind of a link this was 531 // and emit the proper elements 532 // 533 if( protocol != null ) 534 { 535 final char c = uri.charAt(uri.length()-1); 536 if( c == '.' || c == ',' ) 537 { 538 uri = uri.substring(0,uri.length()-1); 539 buf = c + buf; 540 } 541 // System.out.println("URI match "+uri); 542 m_currentElement.addContent( prefix ); 543 makeDirectURILink( uri ); 544 } 545 else 546 { 547 // System.out.println("Matched: '"+camelCase+"'"); 548 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 549 // System.out.println("prefix="+prefix); 550 m_currentElement.addContent( prefix ); 551 552 makeCamelCaseLink( camelCase ); 553 } 554 } 555 556 m_currentElement.addContent( buf ); 557 } 558 else 559 { 560 // 561 // No camelcase asked for, just add the elements 562 // 563 m_currentElement.addContent( buf ); 564 } 565 } 566 catch( final IllegalDataException e ) 567 { 568 // 569 // Sometimes it's possible that illegal XML chars is added to the data. 570 // Here we make sure it does not stop parsing. 571 // 572 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 573 } 574 } 575 576 return numChars; 577 } 578 579 /** 580 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 581 * entities that are already escaped). 582 * 583 * @param buf 584 * @return An escaped string. 585 */ 586 private String escapeHTMLEntities( final String buf) 587 { 588 final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 589 590 for( int i = 0; i < buf.length(); i++ ) 591 { 592 final char ch = buf.charAt(i); 593 594 if( ch == '<' ) 595 { 596 tmpBuf.append("<"); 597 } 598 else if( ch == '>' ) 599 { 600 tmpBuf.append(">"); 601 } 602 else if( ch == '\"' ) 603 { 604 tmpBuf.append("""); 605 } 606 else if( ch == '&' ) 607 { 608 // 609 // If the following is an XML entity reference (&#.*;) we'll 610 // leave it as it is; otherwise we'll replace it with an & 611 // 612 613 boolean isEntity = false; 614 final StringBuilder entityBuf = new StringBuilder(); 615 616 if( i < buf.length() -1 ) 617 { 618 for( int j = i; j < buf.length(); j++ ) 619 { 620 final char ch2 = buf.charAt(j); 621 622 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 623 { 624 entityBuf.append(ch2); 625 626 if( ch2 == ';' ) 627 { 628 isEntity = true; 629 break; 630 } 631 } 632 else 633 { 634 break; 635 } 636 } 637 } 638 639 if( isEntity ) 640 { 641 tmpBuf.append( entityBuf ); 642 i = i + entityBuf.length() - 1; 643 } 644 else 645 { 646 tmpBuf.append("&"); 647 } 648 649 } 650 else 651 { 652 tmpBuf.append( ch ); 653 } 654 } 655 656 return tmpBuf.toString(); 657 } 658 659 private Element pushElement( final Element e ) 660 { 661 flushPlainText(); 662 m_currentElement.addContent( e ); 663 m_currentElement = e; 664 665 return e; 666 } 667 668 private Element addElement( final Content e ) 669 { 670 if( e != null ) 671 { 672 flushPlainText(); 673 m_currentElement.addContent( e ); 674 } 675 return m_currentElement; 676 } 677 678 /** 679 * All elements that can be empty by the HTML DTD. 680 */ 681 // Keep sorted. 682 private static final String[] EMPTY_ELEMENTS = { 683 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 684 }; 685 686 /** 687 * Goes through the current element stack and pops all elements until this 688 * element is found - this essentially "closes" and element. 689 * 690 * @param s 691 * @return The new current element, or null, if there was no such element in the entire stack. 692 */ 693 private Element popElement( final String s ) 694 { 695 final int flushedBytes = flushPlainText(); 696 697 Element currEl = m_currentElement; 698 699 while( currEl.getParentElement() != null ) 700 { 701 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 702 { 703 m_currentElement = currEl.getParentElement(); 704 705 // 706 // Check if it's okay for this element to be empty. Then we will 707 // trick the JDOM generator into not generating an empty element, 708 // by putting an empty string between the tags. Yes, it's a kludge 709 // but what'cha gonna do about it. :-) 710 // 711 712 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 713 { 714 currEl.addContent(""); 715 } 716 717 return m_currentElement; 718 } 719 720 currEl = currEl.getParentElement(); 721 } 722 723 return null; 724 } 725 726 727 /** 728 * Reads the stream until it meets one of the specified 729 * ending characters, or stream end. The ending character will be left 730 * in the stream. 731 */ 732 private String readUntil( final String endChars ) 733 throws IOException 734 { 735 final StringBuilder sb = new StringBuilder( 80 ); 736 int ch = nextToken(); 737 738 while( ch != -1 ) 739 { 740 if( ch == '\\' ) 741 { 742 ch = nextToken(); 743 if( ch == -1 ) 744 { 745 break; 746 } 747 } 748 else 749 { 750 if( endChars.indexOf((char)ch) != -1 ) 751 { 752 pushBack( ch ); 753 break; 754 } 755 } 756 sb.append( (char) ch ); 757 ch = nextToken(); 758 } 759 760 return sb.toString(); 761 } 762 763 /** 764 * Reads the stream while the characters that have been specified are 765 * in the stream, returning then the result as a String. 766 */ 767 private String readWhile( final String endChars ) 768 throws IOException 769 { 770 final StringBuilder sb = new StringBuilder( 80 ); 771 int ch = nextToken(); 772 773 while( ch != -1 ) 774 { 775 if( endChars.indexOf((char)ch) == -1 ) 776 { 777 pushBack( ch ); 778 break; 779 } 780 781 sb.append( (char) ch ); 782 ch = nextToken(); 783 } 784 785 return sb.toString(); 786 } 787 788 private JSPWikiMarkupParser m_cleanTranslator; 789 790 /** 791 * Does a lazy init. Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before 792 * the TranslatorReader it is contained by is up. 793 */ 794 private JSPWikiMarkupParser getCleanTranslator() { 795 if( m_cleanTranslator == null ) { 796 final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() ); 797 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 798 m_cleanTranslator.m_allowHTML = true; 799 } 800 801 return m_cleanTranslator; 802 } 803 /** 804 * Modifies the "hd" parameter to contain proper values. Because 805 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 806 * % after url encoding with '_'. 807 * <p> 808 * Counts also duplicate headings (= headings with similar name), and 809 * attaches a counter. 810 */ 811 private String makeHeadingAnchor( final String baseName, String title, final Heading hd ) { 812 hd.m_titleText = title; 813 title = MarkupParser.wikifyLink( title ); 814 hd.m_titleSection = m_engine.encodeName(title); 815 816 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) { 817 final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1; 818 m_titleSectionCounter.put( hd.m_titleSection, count ); 819 hd.m_titleSection += "-" + count; 820 } else { 821 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 822 } 823 824 hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection; 825 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 826 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 827 828 return hd.m_titleAnchor; 829 } 830 831 private String makeSectionTitle( String title ) { 832 title = title.trim(); 833 try { 834 final JSPWikiMarkupParser dtr = getCleanTranslator(); 835 dtr.setInputReader( new StringReader( title ) ); 836 final WikiDocument doc = dtr.parse(); 837 doc.setContext( m_context ); 838 839 return XmlUtil.extractTextFromDocument( doc ); 840 } catch( final IOException e ) { 841 log.fatal("Title parsing not working", e ); 842 throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e ); 843 } 844 } 845 846 /** 847 * Returns XHTML for the heading. 848 * 849 * @param level The level of the heading. @see Heading 850 * @param title the title for the heading 851 * @param hd a List to which heading should be added 852 * @return An Element containing the heading 853 */ 854 public Element makeHeading( final int level, final String title, final Heading hd ) { 855 final Element el; 856 final String pageName = m_context.getPage().getName(); 857 final String outTitle = makeSectionTitle( title ); 858 hd.m_level = level; 859 860 switch( level ) { 861 case Heading.HEADING_SMALL: 862 el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 863 break; 864 865 case Heading.HEADING_MEDIUM: 866 el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 867 break; 868 869 case Heading.HEADING_LARGE: 870 el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 871 break; 872 873 default: 874 throw new InternalWikiException( "Illegal heading type " + level ); 875 } 876 877 return el; 878 } 879 880 /** 881 * When given a link to a WikiName, we just return 882 * a proper HTML link for it. The local link mutator 883 * chain is also called. 884 */ 885 private Element makeCamelCaseLink( final String wikiname ) 886 { 887 final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 888 889 callMutatorChain( m_localLinkMutatorChain, wikiname ); 890 891 if( matchedLink != null ) { 892 makeLink( READ, matchedLink, wikiname, null, null ); 893 } else { 894 makeLink( EDIT, wikiname, wikiname, null, null ); 895 } 896 897 return m_currentElement; 898 } 899 900 /** Holds the image URL for the duration of this parser */ 901 private String m_outlinkImageURL = null; 902 903 /** 904 * Returns an element for the external link image (out.png). However, 905 * this method caches the URL for the lifetime of this MarkupParser, 906 * because it's commonly used, and we'll end up with possibly hundreds 907 * our thousands of references to it... It's a lot faster, too. 908 * 909 * @return An element containing the HTML for the outlink image. 910 */ 911 private Element outlinkImage() 912 { 913 Element el = null; 914 915 if( m_useOutlinkImage ) 916 { 917 if( m_outlinkImageURL == null ) 918 { 919 m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE ); 920 } 921 922 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 923 el.setAttribute( "src", m_outlinkImageURL ); 924 el.setAttribute( "alt","" ); 925 } 926 927 return el; 928 } 929 930 /** 931 * Takes an URL and turns it into a regular wiki link. Unfortunately, 932 * because of the way that flushPlainText() works, it already encodes 933 * all of the XML entities. But so does WikiContext.getURL(), so we 934 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 935 * <p> 936 * What a crappy problem. 937 * 938 * @param url 939 * @return An anchor Element containing the link. 940 */ 941 private Element makeDirectURILink( String url ) { 942 final Element result; 943 String last = null; 944 945 if( url.endsWith( "," ) || url.endsWith( "." ) ) { 946 last = url.substring( url.length() - 1 ); 947 url = url.substring( 0, url.length() - 1 ); 948 } 949 950 callMutatorChain( m_externalLinkMutatorChain, url ); 951 952 if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) { 953 result = handleImageLink( StringUtils.replace( url, "&", "&" ), url, false ); 954 } else { 955 result = makeLink( EXTERNAL, StringUtils.replace( url, "&", "&" ), url, null, null ); 956 addElement( outlinkImage() ); 957 } 958 959 if( last != null ) { 960 m_plainTextBuf.append( last ); 961 } 962 963 return result; 964 } 965 966 /** 967 * Image links are handled differently: 968 * 1. If the text is a WikiName of an existing page, 969 * it gets linked. 970 * 2. If the text is an external link, then it is inlined. 971 * 3. Otherwise it becomes an ALT text. 972 * 973 * @param reallink The link to the image. 974 * @param link Link text portion, may be a link to somewhere else. 975 * @param hasLinkText If true, then the defined link had a link text available. 976 * This means that the link text may be a link to a wiki page, 977 * or an external resource. 978 */ 979 980 // FIXME: isExternalLink() is called twice. 981 private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) 982 { 983 final String possiblePage = MarkupParser.cleanLink( link ); 984 985 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) 986 { 987 return makeLink( IMAGELINK, reallink, link, null, null ); 988 } 989 else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) 990 { 991 // System.out.println("Orig="+link+", Matched: "+matchedLink); 992 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 993 994 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 995 } 996 else 997 { 998 return makeLink( IMAGE, reallink, link, null, null ); 999 } 1000 } 1001 1002 private Element handleAccessRule( String ruleLine ) { 1003 if( m_wysiwygEditorMode ) { 1004 m_currentElement.addContent( "[" + ruleLine + "]" ); 1005 } 1006 1007 if( !m_parseAccessRules ) { 1008 return m_currentElement; 1009 } 1010 final Page page = m_context.getRealPage(); 1011 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1012 1013 if( ruleLine.startsWith( "{" ) ) { 1014 ruleLine = ruleLine.substring( 1 ); 1015 } 1016 1017 if( ruleLine.endsWith( "}" ) ) { 1018 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1019 } 1020 1021 if( log.isDebugEnabled() ) { 1022 log.debug("page="+page.getName()+", ACL = "+ruleLine); 1023 } 1024 1025 try { 1026 final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine ); 1027 page.setAcl( acl ); 1028 1029 if( log.isDebugEnabled() ) { 1030 log.debug( acl.toString() ); 1031 } 1032 } catch( final WikiSecurityException wse ) { 1033 return makeError( wse.getMessage() ); 1034 } 1035 1036 return m_currentElement; 1037 } 1038 1039 /** 1040 * Handles metadata setting [{SET foo=bar}] 1041 */ 1042 private Element handleMetadata( final String link ) { 1043 if( m_wysiwygEditorMode ) { 1044 m_currentElement.addContent( "[" + link + "]" ); 1045 } 1046 1047 try { 1048 final String args = link.substring( link.indexOf(' '), link.length()-1 ); 1049 final String name = args.substring( 0, args.indexOf('=') ).trim(); 1050 String val = args.substring( args.indexOf('=')+1 ).trim(); 1051 1052 if( val.startsWith("'") ) { 1053 val = val.substring( 1 ); 1054 } 1055 if( val.endsWith("'") ) { 1056 val = val.substring( 0, val.length()-1 ); 1057 } 1058 1059 // log.debug("SET name='"+name+"', value='"+val+"'."); 1060 1061 if( name.length() > 0 && val.length() > 0 ) { 1062 val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val ); 1063 m_context.getPage().setAttribute( name, val ); 1064 } 1065 } catch( final Exception e ) { 1066 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1067 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1068 } 1069 1070 return m_currentElement; 1071 } 1072 1073 /** 1074 * Emits a processing instruction that will disable markup escaping. This is 1075 * very useful if you want to emit HTML directly into the stream. 1076 * 1077 */ 1078 private void disableOutputEscaping() { 1079 addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) ); 1080 } 1081 1082 /** 1083 * Gobbles up all hyperlinks that are encased in square brackets. 1084 */ 1085 private Element handleHyperlinks( String linktext, final int pos ) { 1086 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1087 final StringBuilder sb = new StringBuilder( linktext.length() + 80 ); 1088 1089 if( m_linkParsingOperations.isAccessRule( linktext ) ) { 1090 return handleAccessRule( linktext ); 1091 } 1092 1093 if( m_linkParsingOperations.isMetadata( linktext ) ) { 1094 return handleMetadata( linktext ); 1095 } 1096 1097 if( m_linkParsingOperations.isPluginLink( linktext ) ) { 1098 try { 1099 final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1100 1101 // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not. 1102 if( pluginContent != null ) { 1103 addElement( pluginContent ); 1104 pluginContent.executeParse( m_context ); 1105 } 1106 } catch( final PluginException e ) { 1107 log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 1108 //log.info( "Root cause:",e.getRootThrowable() ); 1109 if( !m_wysiwygEditorMode ) { 1110 final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1111 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 1112 m_context.getRealPage().getWiki(), 1113 m_context.getRealPage().getName(), 1114 e.getMessage() ) ) ); 1115 } 1116 } 1117 1118 return m_currentElement; 1119 } 1120 1121 try { 1122 final LinkParser.Link link = m_linkParser.parse( linktext ); 1123 linktext = link.getText(); 1124 String linkref = link.getReference(); 1125 1126 // 1127 // Yes, we now have the components separated. 1128 // linktext = the text the link should have 1129 // linkref = the url or page name. 1130 // 1131 // In many cases these are the same. [linktext|linkref]. 1132 // 1133 if( m_linkParsingOperations.isVariableLink( linktext ) ) { 1134 final Content el = new VariableContent( linktext ); 1135 1136 addElement( el ); 1137 } else if( m_linkParsingOperations.isExternalLink( linkref ) ) { 1138 // It's an external link, out of this Wiki 1139 1140 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1141 1142 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 1143 handleImageLink( linkref, linktext, link.hasReference() ); 1144 } else { 1145 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1146 addElement( outlinkImage() ); 1147 } 1148 } else if( link.isInterwikiLink() ) { 1149 // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved. 1150 1151 // FIXME: There is an interesting issue here: We probably should 1152 // URLEncode the wikiPage, but we can't since some of the 1153 // Wikis use slashes (/), which won't survive URLEncoding. 1154 // Besides, we don't know which character set the other Wiki 1155 // is using, so you'll have to write the entire name as it appears 1156 // in the URL. Bugger. 1157 1158 final String extWiki = link.getExternalWiki(); 1159 final String wikiPage = link.getExternalWikiPage(); 1160 1161 if( m_wysiwygEditorMode ) { 1162 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1163 } else { 1164 String urlReference = m_engine.getInterWikiURL( extWiki ); 1165 1166 if( urlReference != null ) { 1167 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1168 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1169 1170 if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) { 1171 handleImageLink( urlReference, linktext, link.hasReference() ); 1172 } else { 1173 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1174 } 1175 1176 if( m_linkParsingOperations.isExternalLink( urlReference ) ) { 1177 addElement( outlinkImage() ); 1178 } 1179 } else { 1180 final Object[] args = { escapeHTMLEntities( extWiki ) }; 1181 1182 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1183 } 1184 } 1185 } else if( linkref.startsWith( "#" ) ) { 1186 // It defines a local footnote 1187 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1188 } else if( TextUtil.isNumber( linkref ) ) { 1189 // It defines a reference to a local footnote 1190 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1191 } else { 1192 final int hashMark; 1193 1194 // Internal wiki link, but is it an attachment link? 1195 String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref ); 1196 if( attachment != null ) { 1197 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1198 1199 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 1200 attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment ); 1201 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1202 } else { 1203 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1204 } 1205 } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) { 1206 // It's an internal Wiki link, but to a named section 1207 1208 final String namedSection = linkref.substring( hashMark + 1 ); 1209 linkref = linkref.substring( 0, hashMark ); 1210 1211 linkref = MarkupParser.cleanLink( linkref ); 1212 1213 callMutatorChain( m_localLinkMutatorChain, linkref ); 1214 1215 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1216 if( matchedLink != null ) { 1217 String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) ); 1218 sectref = sectref.replace( '%', '_' ); 1219 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1220 } else { 1221 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1222 } 1223 } else { 1224 // It's an internal Wiki link 1225 linkref = MarkupParser.cleanLink( linkref ); 1226 1227 callMutatorChain( m_localLinkMutatorChain, linkref ); 1228 1229 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1230 if( matchedLink != null ) { 1231 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1232 } else { 1233 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1234 } 1235 } 1236 } 1237 1238 } catch( final ParseException e ) { 1239 log.info( "Parser failure: ", e ); 1240 final Object[] args = { e.getMessage() }; 1241 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1242 } 1243 return m_currentElement; 1244 } 1245 1246 /** 1247 * Pushes back any string that has been read. It will obviously 1248 * be pushed back in a reverse order. 1249 * 1250 * @since 2.1.77 1251 */ 1252 private void pushBack( final String s ) 1253 throws IOException 1254 { 1255 for( int i = s.length()-1; i >= 0; i-- ) 1256 { 1257 pushBack( s.charAt(i) ); 1258 } 1259 } 1260 1261 private Element handleBackslash() 1262 throws IOException 1263 { 1264 final int ch = nextToken(); 1265 1266 if( ch == '\\' ) 1267 { 1268 final int ch2 = nextToken(); 1269 1270 if( ch2 == '\\' ) 1271 { 1272 pushElement( new Element("br").setAttribute("clear","all")); 1273 return popElement("br"); 1274 } 1275 1276 pushBack( ch2 ); 1277 1278 pushElement( new Element("br") ); 1279 return popElement("br"); 1280 } 1281 1282 pushBack( ch ); 1283 1284 return null; 1285 } 1286 1287 private Element handleUnderscore() 1288 throws IOException 1289 { 1290 final int ch = nextToken(); 1291 Element el = null; 1292 1293 if( ch == '_' ) 1294 { 1295 if( m_isbold ) 1296 { 1297 el = popElement("b"); 1298 } 1299 else 1300 { 1301 el = pushElement( new Element("b") ); 1302 } 1303 m_isbold = !m_isbold; 1304 } 1305 else 1306 { 1307 pushBack( ch ); 1308 } 1309 1310 return el; 1311 } 1312 1313 1314 /** 1315 * For example: italics. 1316 */ 1317 private Element handleApostrophe() 1318 throws IOException 1319 { 1320 final int ch = nextToken(); 1321 Element el = null; 1322 1323 if( ch == '\'' ) 1324 { 1325 if( m_isitalic ) 1326 { 1327 el = popElement("i"); 1328 } 1329 else 1330 { 1331 el = pushElement( new Element("i") ); 1332 } 1333 m_isitalic = !m_isitalic; 1334 } 1335 else 1336 { 1337 pushBack( ch ); 1338 } 1339 1340 return el; 1341 } 1342 1343 private Element handleOpenbrace( final boolean isBlock ) 1344 throws IOException 1345 { 1346 final int ch = nextToken(); 1347 1348 if( ch == '{' ) 1349 { 1350 final int ch2 = nextToken(); 1351 1352 if( ch2 == '{' ) 1353 { 1354 m_isPre = true; 1355 m_isEscaping = true; 1356 m_isPreBlock = isBlock; 1357 1358 if( isBlock ) 1359 { 1360 startBlockLevel(); 1361 return pushElement( new Element("pre") ); 1362 } 1363 1364 return pushElement( new Element("span").setAttribute("class","inline-code") ); 1365 } 1366 1367 pushBack( ch2 ); 1368 1369 return pushElement( new Element("tt") ); 1370 } 1371 1372 pushBack( ch ); 1373 1374 return null; 1375 } 1376 1377 /** 1378 * Handles both }} and }}} 1379 */ 1380 private Element handleClosebrace() 1381 throws IOException 1382 { 1383 final int ch2 = nextToken(); 1384 1385 if( ch2 == '}' ) 1386 { 1387 final int ch3 = nextToken(); 1388 1389 if( ch3 == '}' ) 1390 { 1391 if( m_isPre ) 1392 { 1393 if( m_isPreBlock ) 1394 { 1395 popElement( "pre" ); 1396 } 1397 else 1398 { 1399 popElement( "span" ); 1400 } 1401 1402 m_isPre = false; 1403 m_isEscaping = false; 1404 return m_currentElement; 1405 } 1406 1407 m_plainTextBuf.append("}}}"); 1408 return m_currentElement; 1409 } 1410 1411 pushBack( ch3 ); 1412 1413 if( !m_isEscaping ) 1414 { 1415 return popElement("tt"); 1416 } 1417 } 1418 1419 pushBack( ch2 ); 1420 1421 return null; 1422 } 1423 1424 private Element handleDash() 1425 throws IOException 1426 { 1427 int ch = nextToken(); 1428 1429 if( ch == '-' ) 1430 { 1431 final int ch2 = nextToken(); 1432 1433 if( ch2 == '-' ) 1434 { 1435 final int ch3 = nextToken(); 1436 1437 if( ch3 == '-' ) 1438 { 1439 // Empty away all the rest of the dashes. 1440 // Do not forget to return the first non-match back. 1441 do 1442 { 1443 ch = nextToken(); 1444 } 1445 while ( ch == '-' ); 1446 1447 pushBack(ch); 1448 startBlockLevel(); 1449 pushElement( new Element("hr") ); 1450 return popElement( "hr" ); 1451 } 1452 1453 pushBack( ch3 ); 1454 } 1455 pushBack( ch2 ); 1456 } 1457 1458 pushBack( ch ); 1459 1460 return null; 1461 } 1462 1463 private Element handleHeading() 1464 throws IOException 1465 { 1466 Element el = null; 1467 1468 final int ch = nextToken(); 1469 1470 final Heading hd = new Heading(); 1471 1472 if( ch == '!' ) 1473 { 1474 final int ch2 = nextToken(); 1475 1476 if( ch2 == '!' ) 1477 { 1478 final String title = peekAheadLine(); 1479 1480 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1481 } 1482 else 1483 { 1484 pushBack( ch2 ); 1485 final String title = peekAheadLine(); 1486 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1487 } 1488 } 1489 else 1490 { 1491 pushBack( ch ); 1492 final String title = peekAheadLine(); 1493 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1494 } 1495 1496 callHeadingListenerChain( hd ); 1497 1498 m_lastHeading = hd; 1499 1500 if( el != null ) pushElement(el); 1501 1502 return el; 1503 } 1504 1505 /** 1506 * Reads the stream until the next EOL or EOF. Note that it will also read the 1507 * EOL from the stream. 1508 */ 1509 private StringBuilder readUntilEOL() 1510 throws IOException 1511 { 1512 int ch; 1513 final StringBuilder buf = new StringBuilder( 256 ); 1514 1515 while( true ) 1516 { 1517 ch = nextToken(); 1518 1519 if( ch == -1 ) 1520 break; 1521 1522 buf.append( (char) ch ); 1523 1524 if( ch == '\n' ) 1525 break; 1526 } 1527 return buf; 1528 } 1529 1530 /** Controls whether italic is restarted after a paragraph shift */ 1531 1532 private boolean m_restartitalic = false; 1533 private boolean m_restartbold = false; 1534 1535 private boolean m_newLine; 1536 1537 /** 1538 * Starts a block level element, therefore closing 1539 * a potential open paragraph tag. 1540 */ 1541 private void startBlockLevel() 1542 { 1543 // These may not continue over block level limits in XHTML 1544 1545 popElement("i"); 1546 popElement("b"); 1547 popElement("tt"); 1548 1549 if( m_isOpenParagraph ) 1550 { 1551 m_isOpenParagraph = false; 1552 popElement("p"); 1553 m_plainTextBuf.append("\n"); // Just small beautification 1554 } 1555 1556 m_restartitalic = m_isitalic; 1557 m_restartbold = m_isbold; 1558 1559 m_isitalic = false; 1560 m_isbold = false; 1561 } 1562 1563 private static String getListType( final char c ) 1564 { 1565 if( c == '*' ) 1566 { 1567 return "ul"; 1568 } 1569 else if( c == '#' ) 1570 { 1571 return "ol"; 1572 } 1573 throw new InternalWikiException("Parser got faulty list type: "+c); 1574 } 1575 /** 1576 * Like original handleOrderedList() and handleUnorderedList() 1577 * however handles both ordered ('#') and unordered ('*') mixed together. 1578 */ 1579 1580 // FIXME: Refactor this; it's a bit messy. 1581 1582 private Element handleGeneralList() 1583 throws IOException 1584 { 1585 startBlockLevel(); 1586 1587 String strBullets = readWhile( "*#" ); 1588 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1589 final int numBullets = strBullets.length(); 1590 1591 // override the beginning portion of bullet pattern to be like the previous 1592 // to simulate PHPWiki style lists 1593 1594 if(m_allowPHPWikiStyleLists) 1595 { 1596 // only substitute if different 1597 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1598 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 1599 { 1600 if(numBullets <= m_genlistlevel) 1601 { 1602 // Substitute all but the last character (keep the expressed bullet preference) 1603 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 1604 + strBullets.substring(numBullets-1, numBullets); 1605 } 1606 else 1607 { 1608 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 1609 } 1610 } 1611 } 1612 1613 // 1614 // Check if this is still of the same type 1615 // 1616 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1617 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 1618 { 1619 if( numBullets > m_genlistlevel ) 1620 { 1621 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 1622 1623 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 1624 { 1625 // bullets are growing, get from new bullet list 1626 pushElement( new Element("li") ); 1627 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 1628 } 1629 } 1630 else if( numBullets < m_genlistlevel ) 1631 { 1632 // Close the previous list item. 1633 // buf.append( m_renderer.closeListItem() ); 1634 popElement( "li" ); 1635 1636 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 1637 { 1638 // bullets are shrinking, get from old bullet list 1639 1640 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1641 if( m_genlistlevel > 0 ) 1642 { 1643 popElement( "li" ); 1644 } 1645 1646 } 1647 } 1648 else 1649 { 1650 if( m_genlistlevel > 0 ) 1651 { 1652 popElement( "li" ); 1653 } 1654 } 1655 } 1656 else 1657 { 1658 // 1659 // The pattern has changed, unwind and restart 1660 // 1661 int numEqualBullets; 1662 final int numCheckBullets; 1663 1664 // find out how much is the same 1665 numEqualBullets = 0; 1666 numCheckBullets = Math.min(numBullets,m_genlistlevel); 1667 1668 while( numEqualBullets < numCheckBullets ) 1669 { 1670 // if the bullets are equal so far, keep going 1671 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 1672 numEqualBullets++; 1673 // otherwise giveup, we have found how many are equal 1674 else 1675 break; 1676 } 1677 1678 //unwind 1679 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 1680 { 1681 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 1682 if( m_genlistlevel > numBullets ) 1683 { 1684 popElement("li"); 1685 } 1686 } 1687 1688 //rewind 1689 1690 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 1691 for(int i = numEqualBullets; i < numBullets; i++) 1692 { 1693 pushElement( new Element("li") ); 1694 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 1695 } 1696 m_genlistlevel = numBullets; 1697 } 1698 1699 // 1700 // Push a new list item, and eat away any extra whitespace 1701 // 1702 pushElement( new Element("li") ); 1703 readWhile(" "); 1704 1705 // work done, remember the new bullet list (in place of old one) 1706 m_genlistBulletBuffer.setLength(0); 1707 m_genlistBulletBuffer.append(strBullets); 1708 1709 return m_currentElement; 1710 } 1711 1712 private Element unwindGeneralList() 1713 { 1714 //unwind 1715 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 1716 { 1717 popElement( "li" ); 1718 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1719 } 1720 1721 m_genlistBulletBuffer.setLength(0); 1722 1723 return null; 1724 } 1725 1726 1727 private Element handleDefinitionList() 1728 throws IOException 1729 { 1730 if( !m_isdefinition ) 1731 { 1732 m_isdefinition = true; 1733 1734 startBlockLevel(); 1735 1736 pushElement( new Element("dl") ); 1737 return pushElement( new Element("dt") ); 1738 } 1739 1740 return null; 1741 } 1742 1743 private Element handleOpenbracket() 1744 throws IOException 1745 { 1746 final StringBuilder sb = new StringBuilder(40); 1747 final int pos = getPosition(); 1748 int ch = nextToken(); 1749 boolean isPlugin = false; 1750 1751 if( ch == '[' ) 1752 { 1753 if( m_wysiwygEditorMode ) 1754 { 1755 sb.append( '[' ); 1756 } 1757 1758 sb.append( (char)ch ); 1759 1760 while( (ch = nextToken()) == '[' ) 1761 { 1762 sb.append( (char)ch ); 1763 } 1764 } 1765 1766 1767 if( ch == '{' ) 1768 { 1769 isPlugin = true; 1770 } 1771 1772 pushBack( ch ); 1773 1774 if( sb.length() > 0 ) 1775 { 1776 m_plainTextBuf.append( sb ); 1777 return m_currentElement; 1778 } 1779 1780 // 1781 // Find end of hyperlink 1782 // 1783 1784 ch = nextToken(); 1785 int nesting = 1; // Check for nested plugins 1786 1787 while( ch != -1 ) 1788 { 1789 final int ch2 = nextToken(); pushBack(ch2); 1790 1791 if( isPlugin ) 1792 { 1793 if( ch == '[' && ch2 == '{' ) 1794 { 1795 nesting++; 1796 } 1797 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 1798 { 1799 break; 1800 } 1801 else if( ch == '}' && ch2 == ']' ) 1802 { 1803 // NB: This will be decremented once at the end 1804 nesting--; 1805 } 1806 } 1807 else 1808 { 1809 if( ch == ']' ) 1810 { 1811 break; 1812 } 1813 } 1814 1815 sb.append( (char) ch ); 1816 1817 ch = nextToken(); 1818 } 1819 1820 // 1821 // If the link is never finished, do some tricks to display the rest of the line 1822 // unchanged. 1823 // 1824 if( ch == -1 ) 1825 { 1826 log.debug("Warning: unterminated link detected!"); 1827 m_isEscaping = true; 1828 m_plainTextBuf.append( sb ); 1829 flushPlainText(); 1830 m_isEscaping = false; 1831 return m_currentElement; 1832 } 1833 1834 return handleHyperlinks( sb.toString(), pos ); 1835 } 1836 1837 /** 1838 * Reads the stream until the current brace is closed or stream end. 1839 */ 1840 private String readBraceContent( final char opening, final char closing ) 1841 throws IOException 1842 { 1843 final StringBuilder sb = new StringBuilder(40); 1844 int braceLevel = 1; 1845 int ch; 1846 while(( ch = nextToken() ) != -1 ) 1847 { 1848 if( ch == '\\' ) 1849 { 1850 continue; 1851 } 1852 else if ( ch == opening ) 1853 { 1854 braceLevel++; 1855 } 1856 else if ( ch == closing ) 1857 { 1858 braceLevel--; 1859 if (braceLevel==0) 1860 { 1861 break; 1862 } 1863 } 1864 sb.append( (char)ch ); 1865 } 1866 return sb.toString(); 1867 } 1868 1869 1870 /** 1871 * Handles constructs of type %%(style) and %%class 1872 * @param newLine 1873 * @return An Element containing the div or span, depending on the situation. 1874 * @throws IOException 1875 */ 1876 private Element handleDiv( final boolean newLine ) 1877 throws IOException 1878 { 1879 int ch = nextToken(); 1880 Element el = null; 1881 1882 if( ch == '%' ) 1883 { 1884 String style = null; 1885 String clazz = null; 1886 1887 ch = nextToken(); 1888 1889 // 1890 // Style or class? 1891 // 1892 if( ch == '(' ) 1893 { 1894 style = readBraceContent('(',')'); 1895 } 1896 else if( Character.isLetter( (char) ch ) ) 1897 { 1898 pushBack( ch ); 1899 clazz = readUntil( "( \t\n\r" ); 1900 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 1901 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 1902 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 1903 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 1904 1905 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 1906 //(2) remove any invalid character 1907 if( clazz != null){ 1908 1909 clazz = clazz.replace('.', ' ') 1910 .replaceAll("[^\\s-_\\w\\x200-\\x377]+",""); 1911 1912 } 1913 ch = nextToken(); 1914 1915 //check for %%class1.class2( style information ) 1916 if( ch == '(' ) 1917 { 1918 style = readBraceContent('(',')'); 1919 } 1920 // 1921 // Pop out only spaces, so that the upcoming EOL check does not check the 1922 // next line. 1923 // 1924 else if( ch == '\n' || ch == '\r' ) 1925 { 1926 pushBack(ch); 1927 } 1928 } 1929 else 1930 { 1931 // 1932 // Anything else stops. 1933 // 1934 1935 pushBack(ch); 1936 1937 try 1938 { 1939 final Boolean isSpan = m_styleStack.pop(); 1940 1941 if( isSpan == null ) 1942 { 1943 // Fail quietly 1944 } 1945 else if( isSpan.booleanValue() ) 1946 { 1947 el = popElement( "span" ); 1948 } 1949 else 1950 { 1951 el = popElement( "div" ); 1952 } 1953 } 1954 catch( final EmptyStackException e ) 1955 { 1956 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 1957 return m_currentElement; 1958 } 1959 1960 return el; 1961 } 1962 1963 // 1964 // Check if there is an attempt to do something nasty 1965 // 1966 1967 try 1968 { 1969 style = StringEscapeUtils.unescapeHtml4(style); 1970 if( style != null && style.indexOf("javascript:") != -1 ) 1971 { 1972 log.debug("Attempt to output javascript within CSS:"+style); 1973 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1974 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 1975 } 1976 } 1977 catch( final NumberFormatException e ) 1978 { 1979 // 1980 // If there are unknown entities, we don't want the parser to stop. 1981 // 1982 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1983 final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 1984 return addElement( makeError( msg ) ); 1985 } 1986 1987 // 1988 // Decide if we should open a div or a span? 1989 // 1990 final String eol = peekAheadLine(); 1991 1992 if( eol.trim().length() > 0 ) 1993 { 1994 // There is stuff after the class 1995 1996 el = new Element("span"); 1997 1998 m_styleStack.push( Boolean.TRUE ); 1999 } 2000 else 2001 { 2002 startBlockLevel(); 2003 el = new Element("div"); 2004 m_styleStack.push( Boolean.FALSE ); 2005 } 2006 2007 if( style != null ) el.setAttribute("style", style); 2008 if( clazz != null ) el.setAttribute("class", clazz); 2009 el = pushElement( el ); 2010 2011 return el; 2012 } 2013 2014 pushBack(ch); 2015 2016 return el; 2017 } 2018 2019 private Element handleSlash( final boolean newLine ) 2020 throws IOException 2021 { 2022 final int ch = nextToken(); 2023 2024 pushBack(ch); 2025 if( ch == '%' && !m_styleStack.isEmpty() ) 2026 { 2027 return handleDiv( newLine ); 2028 } 2029 2030 return null; 2031 } 2032 2033 private Element handleBar( final boolean newLine ) 2034 throws IOException 2035 { 2036 Element el = null; 2037 2038 if( !m_istable && !newLine ) 2039 { 2040 return null; 2041 } 2042 2043 // 2044 // If the bar is in the first column, we will either start 2045 // a new table or continue the old one. 2046 // 2047 2048 if( newLine ) 2049 { 2050 if( !m_istable ) 2051 { 2052 startBlockLevel(); 2053 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2054 m_istable = true; 2055 m_rowNum = 0; 2056 } 2057 2058 m_rowNum++; 2059 final Element tr = ( m_rowNum % 2 != 0 ) 2060 ? new Element("tr").setAttribute("class", "odd") 2061 : new Element("tr"); 2062 el = pushElement( tr ); 2063 } 2064 2065 // 2066 // Check out which table cell element to start; 2067 // a header element (th) or a regular element (td). 2068 // 2069 final int ch = nextToken(); 2070 2071 if( ch == '|' ) 2072 { 2073 if( !newLine ) 2074 { 2075 el = popElement("th"); 2076 if( el == null ) popElement("td"); 2077 } 2078 el = pushElement( new Element("th") ); 2079 } 2080 else 2081 { 2082 if( !newLine ) 2083 { 2084 el = popElement("td"); 2085 if( el == null ) popElement("th"); 2086 } 2087 2088 el = pushElement( new Element("td") ); 2089 2090 pushBack( ch ); 2091 } 2092 2093 return el; 2094 } 2095 2096 /** 2097 * Generic escape of next character or entity. 2098 */ 2099 private Element handleTilde() 2100 throws IOException 2101 { 2102 final int ch = nextToken(); 2103 2104 if( ch == ' ' ) 2105 { 2106 if( m_wysiwygEditorMode ) 2107 { 2108 m_plainTextBuf.append( "~ " ); 2109 } 2110 return m_currentElement; 2111 } 2112 2113 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2114 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2115 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2116 { 2117 if( m_wysiwygEditorMode ) 2118 { 2119 m_plainTextBuf.append( '~' ); 2120 } 2121 2122 m_plainTextBuf.append( (char)ch ); 2123 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2124 return m_currentElement; 2125 } 2126 2127 // No escape. 2128 pushBack( ch ); 2129 2130 return null; 2131 } 2132 2133 private void fillBuffer( final Element startElement ) 2134 throws IOException 2135 { 2136 m_currentElement = startElement; 2137 2138 boolean quitReading = false; 2139 m_newLine = true; 2140 disableOutputEscaping(); 2141 2142 while(!quitReading) 2143 { 2144 final int ch = nextToken(); 2145 2146 if( ch == -1 ) break; 2147 2148 // 2149 // Check if we're actually ending the preformatted mode. 2150 // We still must do an entity transformation here. 2151 // 2152 if( m_isEscaping ) 2153 { 2154 if( ch == '}' ) 2155 { 2156 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2157 } 2158 else if( ch == -1 ) 2159 { 2160 quitReading = true; 2161 } 2162 else if( ch == '\r' ) 2163 { 2164 // DOS line feeds we ignore. 2165 } 2166 else if( ch == '<' ) 2167 { 2168 m_plainTextBuf.append( "<" ); 2169 } 2170 else if( ch == '>' ) 2171 { 2172 m_plainTextBuf.append( ">" ); 2173 } 2174 else if( ch == '&' ) 2175 { 2176 m_plainTextBuf.append( "&" ); 2177 } 2178 else if( ch == '~' ) 2179 { 2180 String braces = readWhile("}"); 2181 if( braces.length() >= 3 ) 2182 { 2183 m_plainTextBuf.append("}}}"); 2184 2185 braces = braces.substring(3); 2186 } 2187 else 2188 { 2189 m_plainTextBuf.append( (char) ch ); 2190 } 2191 2192 for( int i = braces.length()-1; i >= 0; i-- ) 2193 { 2194 pushBack(braces.charAt(i)); 2195 } 2196 } 2197 else 2198 { 2199 m_plainTextBuf.append( (char) ch ); 2200 } 2201 2202 continue; 2203 } 2204 2205 // 2206 // An empty line stops a list 2207 // 2208 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2209 { 2210 m_plainTextBuf.append(unwindGeneralList()); 2211 } 2212 2213 if( m_newLine && ch != '|' && m_istable ) 2214 { 2215 popElement("table"); 2216 m_istable = false; 2217 } 2218 2219 int skip = IGNORE; 2220 2221 // 2222 // Do the actual parsing and catch any errors. 2223 // 2224 try 2225 { 2226 skip = parseToken( ch ); 2227 } 2228 catch( final IllegalDataException e ) 2229 { 2230 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2231 2232 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2233 } 2234 2235 // 2236 // The idea is as follows: If the handler method returns 2237 // an element (el != null), it is assumed that it has been 2238 // added in the stack. Otherwise the character is added 2239 // as is to the plaintext buffer. 2240 // 2241 // For the transition phase, if s != null, it also gets 2242 // added in the plaintext buffer. 2243 // 2244 2245 switch( skip ) 2246 { 2247 case ELEMENT: 2248 m_newLine = false; 2249 break; 2250 2251 case CHARACTER: 2252 m_plainTextBuf.append( (char) ch ); 2253 m_newLine = false; 2254 break; 2255 2256 case IGNORE: 2257 default: 2258 break; 2259 } 2260 } 2261 2262 closeHeadings(); 2263 popElement("domroot"); 2264 } 2265 2266 private String cleanupSuspectData( final String s ) 2267 { 2268 final StringBuilder sb = new StringBuilder( s.length() ); 2269 2270 for( int i = 0; i < s.length(); i++ ) 2271 { 2272 final char c = s.charAt(i); 2273 2274 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2275 else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); 2276 } 2277 2278 return sb.toString(); 2279 } 2280 2281 /** The token is a plain character. */ 2282 protected static final int CHARACTER = 0; 2283 2284 /** The token is a wikimarkup element. */ 2285 protected static final int ELEMENT = 1; 2286 2287 /** The token is to be ignored. */ 2288 protected static final int IGNORE = 2; 2289 2290 /** 2291 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2292 * you think this was a wiki markup element, and IGNORE, if you think 2293 * we should ignore this altogether. 2294 * <p> 2295 * To add your own MarkupParser, you can override this method, but it 2296 * is recommended that you call super.parseToken() as well to gain advantage 2297 * of JSPWiki's own markup. You can call it at the start of your own 2298 * parseToken() or end - it does not matter. 2299 * 2300 * @param ch The character under investigation 2301 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2302 * @throws IOException If parsing fails. 2303 */ 2304 protected int parseToken( final int ch ) 2305 throws IOException 2306 { 2307 Element el = null; 2308 2309 // 2310 // Now, check the incoming token. 2311 // 2312 switch( ch ) 2313 { 2314 case '\r': 2315 // DOS linefeeds we forget 2316 return IGNORE; 2317 2318 case '\n': 2319 // 2320 // Close things like headings, etc. 2321 // 2322 2323 // FIXME: This is not really very fast 2324 2325 closeHeadings(); 2326 2327 popElement("dl"); // Close definition lists. 2328 if( m_istable ) 2329 { 2330 popElement("tr"); 2331 } 2332 2333 m_isdefinition = false; 2334 2335 if( m_newLine ) 2336 { 2337 // Paragraph change. 2338 startBlockLevel(); 2339 2340 // 2341 // Figure out which elements cannot be enclosed inside 2342 // a <p></p> pair according to XHTML rules. 2343 // 2344 final String nextLine = peekAheadLine(); 2345 if( nextLine.length() == 0 || 2346 (nextLine.length() > 0 && 2347 !nextLine.startsWith("{{{") && 2348 !nextLine.startsWith("----") && 2349 !nextLine.startsWith("%%") && 2350 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2351 { 2352 pushElement( new Element("p") ); 2353 m_isOpenParagraph = true; 2354 2355 if( m_restartitalic ) 2356 { 2357 pushElement( new Element("i") ); 2358 m_isitalic = true; 2359 m_restartitalic = false; 2360 } 2361 if( m_restartbold ) 2362 { 2363 pushElement( new Element("b") ); 2364 m_isbold = true; 2365 m_restartbold = false; 2366 } 2367 } 2368 } 2369 else 2370 { 2371 m_plainTextBuf.append("\n"); 2372 m_newLine = true; 2373 } 2374 return IGNORE; 2375 2376 2377 case '\\': 2378 el = handleBackslash(); 2379 break; 2380 2381 case '_': 2382 el = handleUnderscore(); 2383 break; 2384 2385 case '\'': 2386 el = handleApostrophe(); 2387 break; 2388 2389 case '{': 2390 el = handleOpenbrace( m_newLine ); 2391 break; 2392 2393 case '}': 2394 el = handleClosebrace(); 2395 break; 2396 2397 case '-': 2398 if( m_newLine ) 2399 el = handleDash(); 2400 2401 break; 2402 2403 case '!': 2404 if( m_newLine ) 2405 { 2406 el = handleHeading(); 2407 } 2408 break; 2409 2410 case ';': 2411 if( m_newLine ) 2412 { 2413 el = handleDefinitionList(); 2414 } 2415 break; 2416 2417 case ':': 2418 if( m_isdefinition ) 2419 { 2420 popElement("dt"); 2421 el = pushElement( new Element("dd") ); 2422 m_isdefinition = false; 2423 } 2424 break; 2425 2426 case '[': 2427 el = handleOpenbracket(); 2428 break; 2429 2430 case '*': 2431 if( m_newLine ) 2432 { 2433 pushBack('*'); 2434 el = handleGeneralList(); 2435 } 2436 break; 2437 2438 case '#': 2439 if( m_newLine ) 2440 { 2441 pushBack('#'); 2442 el = handleGeneralList(); 2443 } 2444 break; 2445 2446 case '|': 2447 el = handleBar( m_newLine ); 2448 break; 2449 2450 case '~': 2451 el = handleTilde(); 2452 break; 2453 2454 case '%': 2455 el = handleDiv( m_newLine ); 2456 break; 2457 2458 case '/': 2459 el = handleSlash( m_newLine ); 2460 break; 2461 2462 default: 2463 break; 2464 } 2465 2466 return el != null ? ELEMENT : CHARACTER; 2467 } 2468 2469 private void closeHeadings() 2470 { 2471 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2472 { 2473 // Add the hash anchor element at the end of the heading 2474 addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2475 m_lastHeading = null; 2476 } 2477 popElement("h2"); 2478 popElement("h3"); 2479 popElement("h4"); 2480 } 2481 2482 /** 2483 * Parses the entire document from the Reader given in the constructor or 2484 * set by {@link #setInputReader(Reader)}. 2485 * 2486 * @return A WikiDocument, ready to be passed to the renderer. 2487 * @throws IOException If parsing cannot be accomplished. 2488 */ 2489 @Override 2490 public WikiDocument parse() 2491 throws IOException 2492 { 2493 final WikiDocument d = new WikiDocument( m_context.getPage() ); 2494 d.setContext( m_context ); 2495 2496 final Element rootElement = new Element("domroot"); 2497 2498 d.setRootElement( rootElement ); 2499 2500 fillBuffer( rootElement ); 2501 2502 paragraphify(rootElement); 2503 2504 return d; 2505 } 2506 2507 /** 2508 * Checks out that the first paragraph is correctly installed. 2509 * 2510 * @param rootElement 2511 */ 2512 private void paragraphify( final Element rootElement) 2513 { 2514 // 2515 // Add the paragraph tag to the first paragraph 2516 // 2517 final List< Content > kids = rootElement.getContent(); 2518 2519 if( rootElement.getChild("p") != null ) 2520 { 2521 final ArrayList<Content> ls = new ArrayList<>(); 2522 int idxOfFirstContent = 0; 2523 int count = 0; 2524 2525 for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) 2526 { 2527 final Content c = i.next(); 2528 if( c instanceof Element ) 2529 { 2530 final String name = ( ( Element )c ).getName(); 2531 if( isBlockLevel( name ) ) break; 2532 } 2533 2534 if( !(c instanceof ProcessingInstruction) ) 2535 { 2536 ls.add( c ); 2537 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2538 } 2539 } 2540 2541 // 2542 // If there were any elements, then add a new <p> (unless it would 2543 // be an empty one) 2544 // 2545 if( ls.size() > 0 ) 2546 { 2547 final Element newel = new Element("p"); 2548 2549 for( final Iterator< Content > i = ls.iterator(); i.hasNext(); ) 2550 { 2551 final Content c = i.next(); 2552 2553 c.detach(); 2554 newel.addContent(c); 2555 } 2556 2557 // 2558 // Make sure there are no empty <p/> tags added. 2559 // 2560 if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) 2561 rootElement.addContent(idxOfFirstContent, newel); 2562 } 2563 } 2564 } 2565 2566}