001/* 002 Licensed to the Apache Software Foundation (ASF) under one 003 or more contributor license agreements. See the NOTICE file 004 distributed with this work for additional information 005 regarding copyright ownership. The ASF licenses this file 006 to you under the Apache License, Version 2.0 (the 007 "License"); you may not use this file except in compliance 008 with the License. You may obtain a copy of the License at 009 010 http://www.apache.org/licenses/LICENSE-2.0 011 012 Unless required by applicable law or agreed to in writing, 013 software distributed under the License is distributed on an 014 "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 015 KIND, either express or implied. See the License for the 016 specific language governing permissions and limitations 017 under the License. 018 */ 019package org.apache.wiki.parser; 020 021import org.apache.commons.lang3.StringUtils; 022import org.apache.commons.text.StringEscapeUtils; 023import org.apache.logging.log4j.LogManager; 024import org.apache.logging.log4j.Logger; 025import org.apache.oro.text.regex.MalformedPatternException; 026import org.apache.oro.text.regex.MatchResult; 027import org.apache.oro.text.regex.Pattern; 028import org.apache.oro.text.regex.PatternCompiler; 029import org.apache.oro.text.regex.PatternMatcher; 030import org.apache.oro.text.regex.Perl5Compiler; 031import org.apache.oro.text.regex.Perl5Matcher; 032import org.apache.wiki.InternalWikiException; 033import org.apache.wiki.StringTransmutator; 034import org.apache.wiki.api.core.Acl; 035import org.apache.wiki.api.core.Context; 036import org.apache.wiki.api.core.ContextEnum; 037import org.apache.wiki.api.core.Page; 038import org.apache.wiki.api.exceptions.PluginException; 039import org.apache.wiki.api.plugin.Plugin; 040import org.apache.wiki.api.spi.Wiki; 041import org.apache.wiki.attachment.AttachmentManager; 042import org.apache.wiki.auth.AuthorizationManager; 043import org.apache.wiki.auth.UserManager; 044import org.apache.wiki.auth.WikiSecurityException; 045import org.apache.wiki.auth.acl.AclManager; 046import org.apache.wiki.i18n.InternationalizationManager; 047import org.apache.wiki.preferences.Preferences; 048import org.apache.wiki.util.TextUtil; 049import org.apache.wiki.util.XmlUtil; 050import org.apache.wiki.variables.VariableManager; 051import org.jdom2.Attribute; 052import org.jdom2.Content; 053import org.jdom2.Element; 054import org.jdom2.IllegalDataException; 055import org.jdom2.ProcessingInstruction; 056import org.jdom2.Verifier; 057 058import javax.xml.transform.Result; 059import java.io.IOException; 060import java.io.Reader; 061import java.io.StringReader; 062import java.text.MessageFormat; 063import java.util.ArrayList; 064import java.util.Arrays; 065import java.util.Collection; 066import java.util.EmptyStackException; 067import java.util.HashMap; 068import java.util.Iterator; 069import java.util.List; 070import java.util.Map; 071import java.util.Properties; 072import java.util.ResourceBundle; 073import java.util.Stack; 074 075/** 076 * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the 077 * heart and soul of JSPWiki : make sure you test properly anything that is added, 078 * or else it breaks down horribly. 079 * 080 * @since 2.4 081 */ 082public class JSPWikiMarkupParser extends MarkupParser { 083 084 protected static final int READ = 0; 085 protected static final int EDIT = 1; 086 protected static final int EMPTY = 2; // Empty message 087 protected static final int LOCAL = 3; 088 protected static final int LOCALREF = 4; 089 protected static final int IMAGE = 5; 090 protected static final int EXTERNAL = 6; 091 protected static final int INTERWIKI = 7; 092 protected static final int IMAGELINK = 8; 093 protected static final int IMAGEWIKILINK = 9; 094 protected static final int ATTACHMENT = 10; 095 096 private static final Logger log = LogManager.getLogger( JSPWikiMarkupParser.class ); 097 098 private boolean m_isbold; 099 private boolean m_isitalic; 100 private boolean m_istable; 101 private boolean m_isPre; 102 private boolean m_isEscaping; 103 private boolean m_isdefinition; 104 private boolean m_isPreBlock; 105 106 /** Contains style information, in multiple forms. */ 107 private final Stack<Boolean> m_styleStack = new Stack<>(); 108 109 // general list handling 110 private int m_genlistlevel; 111 private final StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern 112 private final boolean m_allowPHPWikiStyleLists = true; 113 114 private boolean m_isOpenParagraph; 115 116 /** Parser for extended link functionality. */ 117 private final LinkParser m_linkParser = new LinkParser(); 118 119 /** Keeps track of any plain text that gets put in the Text nodes */ 120 private StringBuilder m_plainTextBuf = new StringBuilder(20); 121 122 private Element m_currentElement; 123 124 /** Keep track of duplicate header names. */ 125 private final Map<String, Integer> m_titleSectionCounter = new HashMap<>(); 126 127 /** If true, then considers CamelCase links as well. */ 128 private boolean m_camelCaseLinks; 129 130 /** If true, then generate special output for wysiwyg editing in certain cases */ 131 private boolean m_wysiwygEditorMode; 132 133 /** If true, consider URIs that have no brackets as well. */ 134 // FIXME: Currently reserved, but not used. 135 private boolean m_plainUris; 136 137 /** If true, all outward links use a small link image. */ 138 private boolean m_useOutlinkImage = true; 139 140 private boolean m_useAttachmentImage = true; 141 142 /** If true, allows raw HTML. */ 143 private boolean m_allowHTML; 144 145 private boolean m_useRelNofollow; 146 147 private final PatternCompiler m_compiler = new Perl5Compiler(); 148 149 static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; 150 151 private final PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); 152 private Pattern m_camelCasePattern; 153 154 private int m_rowNum = 1; 155 156 private Heading m_lastHeading; 157 158 private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; 159 160 /** 161 * Creates a markup parser. 162 * 163 * @param context The WikiContext which controls the parsing 164 * @param in Where the data is read from. 165 */ 166 public JSPWikiMarkupParser( final Context context, final Reader in ) 167 { 168 super( context, in ); 169 initialize(); 170 } 171 172 // FIXME: parsers should be pooled for better performance. 173 private void initialize() { 174 initInlineImagePatterns(); 175 176 m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN ); 177 if( m_camelCasePattern == null ) { 178 try { 179 m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); 180 } catch( final MalformedPatternException e ) { 181 log.fatal("Internal error: Someone put in a faulty pattern.",e); 182 throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); 183 } 184 m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); 185 } 186 187 // Set the properties. 188 final Properties props = m_engine.getWikiProperties(); 189 final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); 190 191 if( cclinks != null ) { 192 m_camelCaseLinks = TextUtil.isPositive( cclinks ); 193 } else { 194 m_camelCaseLinks = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks ); 195 } 196 197 final Boolean wysiwygVariable = m_context.getVariable( Context.VAR_WYSIWYG_EDITOR_MODE ); 198 if( wysiwygVariable != null ) { 199 m_wysiwygEditorMode = wysiwygVariable; 200 } 201 202 m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); 203 m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); 204 m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); 205 m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); 206 m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); 207 208 if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) { 209 disableAccessRules(); 210 } 211 212 m_context.getPage().setHasMetadata(); 213 } 214 215 /** 216 * Calls a transmutator chain. 217 * 218 * @param list Chain to call 219 * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. 220 * @return The result of the mutation. 221 */ 222 protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) { 223 if( list == null || list.size() == 0 ) { 224 return text; 225 } 226 227 for( final StringTransmutator m : list ) { 228 text = m.mutate( m_context, text ); 229 } 230 231 return text; 232 } 233 234 /** 235 * Calls the heading listeners. 236 * 237 * @param param A Heading object. 238 */ 239 protected void callHeadingListenerChain( final Heading param ) { 240 for( final HeadingListener h : m_headingListenerChain ) { 241 h.headingAdded( m_context, param ); 242 } 243 } 244 245 /** 246 * Creates a JDOM anchor element. Can be overridden to change the URL creation, 247 * if you really know what you are doing. 248 * 249 * @param type One of the types above 250 * @param link URL to which to link to 251 * @param text Link text 252 * @param section If a particular section identifier is required. 253 * @return An A element. 254 * @since 2.4.78 255 */ 256 protected Element createAnchor( final int type, final String link, String text, String section) 257 { 258 text = escapeHTMLEntities( text ); 259 section = escapeHTMLEntities( section ); 260 final Element el = new Element("a"); 261 el.setAttribute("class",CLASS_TYPES[type]); 262 el.setAttribute("href",link+section); 263 el.addContent(text); 264 return el; 265 } 266 267 private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) 268 { 269 Element el = null; 270 271 if( text == null ) text = link; 272 273 text = callMutatorChain( m_linkMutators, text ); 274 275 section = (section != null) ? ("#"+section) : ""; 276 277 // Make sure we make a link name that can be accepted 278 // as a valid URL. 279 280 if( link.isEmpty() ) 281 { 282 type = EMPTY; 283 } 284 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 285 286 switch(type) 287 { 288 case READ: 289 el = createAnchor( READ, m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), link), text, section ); 290 break; 291 292 case EDIT: 293 el = createAnchor( EDIT, m_context.getURL( ContextEnum.PAGE_EDIT.getRequestContext(),link), text, "" ); 294 el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); 295 296 break; 297 298 case EMPTY: 299 el = new Element("u").addContent(text); 300 break; 301 302 // 303 // These two are for local references - footnotes and 304 // references to footnotes. 305 // We embed the page name (or whatever WikiContext gives us) 306 // to make sure the links are unique across Wiki. 307 // 308 case LOCALREF: 309 el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); 310 break; 311 312 case LOCAL: 313 el = new Element("a").setAttribute("class",CLASS_FOOTNOTE); 314 el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); 315 el.addContent("["+text+"]"); 316 break; 317 318 // 319 // With the image, external and interwiki types we need to 320 // make sure nobody can put in Javascript or something else 321 // annoying into the links themselves. We do this by preventing 322 // a haxor from stopping the link name short with quotes in 323 // fillBuffer(). 324 // 325 case IMAGE: 326 el = new Element("img").setAttribute("class","inline"); 327 el.setAttribute("src",link); 328 el.setAttribute("alt",text); 329 break; 330 331 case IMAGELINK: 332 el = new Element("img").setAttribute("class","inline"); 333 el.setAttribute("src",link); 334 el.setAttribute("alt",text); 335 el = createAnchor(IMAGELINK,text,"","").addContent(el); 336 break; 337 338 case IMAGEWIKILINK: 339 final String pagelink = m_context.getURL( ContextEnum.PAGE_VIEW.getRequestContext(), text ); 340 el = new Element("img").setAttribute("class","inline"); 341 el.setAttribute("src",link); 342 el.setAttribute("alt",text); 343 el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); 344 break; 345 346 case EXTERNAL: 347 el = createAnchor( EXTERNAL, link, text, section ); 348 if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); 349 break; 350 351 case INTERWIKI: 352 el = createAnchor( INTERWIKI, link, text, section ); 353 break; 354 355 case ATTACHMENT: 356 final String attlink = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), link ); 357 final String infolink = m_context.getURL( ContextEnum.PAGE_INFO.getRequestContext(), link ); 358 final String imglink = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), "images/attachment_small.png" ); 359 el = createAnchor( ATTACHMENT, attlink, text, "" ); 360 361 if( m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) { 362 el.setAttribute("download", ""); 363 } 364 365 pushElement(el); 366 popElement(el.getName()); 367 368 if( m_useAttachmentImage ) 369 { 370 el = new Element("img").setAttribute("src",imglink); 371 el.setAttribute("border","0"); 372 el.setAttribute("alt","(info)"); 373 374 el = new Element("a").setAttribute("href",infolink).addContent(el); 375 el.setAttribute("class","infolink"); 376 } 377 else 378 { 379 el = null; 380 } 381 break; 382 383 default: 384 break; 385 } 386 387 if( el != null && attributes != null ) 388 { 389 while( attributes.hasNext() ) 390 { 391 final Attribute attr = attributes.next(); 392 if( attr != null ) 393 { 394 el.setAttribute(attr); 395 } 396 } 397 } 398 399 if( el != null ) 400 { 401 flushPlainText(); 402 m_currentElement.addContent( el ); 403 } 404 return el; 405 } 406 407 /** 408 * These are all of the HTML 4.01 block-level elements. 409 */ 410 private static final String[] BLOCK_ELEMENTS = { 411 "address", "blockquote", "div", "dl", "fieldset", "form", 412 "h1", "h2", "h3", "h4", "h5", "h6", 413 "hr", "noscript", "ol", "p", "pre", "table", "ul" 414 }; 415 416 private static boolean isBlockLevel( final String name ) 417 { 418 return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; 419 } 420 421 /** 422 * This method peeks ahead in the stream until EOL and returns the result. 423 * It will keep the buffers untouched. 424 * 425 * @return The string from the current position to the end of line. 426 */ 427 428 // FIXME: Always returns an empty line, even if the stream is full. 429 private String peekAheadLine() 430 throws IOException 431 { 432 final String s = readUntilEOL().toString(); 433 434 if( s.length() > PUSHBACK_BUFFER_SIZE ) 435 { 436 log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); 437 pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); 438 } 439 else 440 { 441 try 442 { 443 pushBack( s ); 444 } 445 catch( final IOException e ) 446 { 447 log.warn("Pushback failed: the line is probably too long. Attempting to recover."); 448 } 449 } 450 return s; 451 } 452 453 private int flushPlainText() 454 { 455 final int numChars = m_plainTextBuf.length(); 456 457 if( numChars > 0 ) 458 { 459 String buf; 460 461 if( !m_allowHTML ) 462 { 463 buf = escapeHTMLEntities(m_plainTextBuf.toString()); 464 } 465 else 466 { 467 buf = m_plainTextBuf.toString(); 468 } 469 // 470 // We must first empty the buffer because the side effect of 471 // calling makeCamelCaseLink() is to call this routine. 472 // 473 474 m_plainTextBuf = new StringBuilder(20); 475 476 try 477 { 478 // 479 // This is the heaviest part of parsing, and therefore we can 480 // do some optimization here. 481 // 482 // 1) Only when the length of the buffer is big enough, we try to do the match 483 // 484 485 if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) 486 { 487 // System.out.println("Buffer="+buf); 488 489 while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) 490 { 491 final MatchResult result = m_camelCaseMatcher.getMatch(); 492 493 final String firstPart = buf.substring(0,result.beginOffset(0)); 494 String prefix = result.group(1); 495 496 if( prefix == null ) prefix = ""; 497 498 final String camelCase = result.group(2); 499 final String protocol = result.group(3); 500 String uri = protocol+result.group(4); 501 buf = buf.substring(result.endOffset(0)); 502 503 m_currentElement.addContent( firstPart ); 504 505 // 506 // Check if the user does not wish to do URL or WikiWord expansion 507 // 508 if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) 509 { 510 if( prefix.endsWith("~") ) 511 { 512 if( m_wysiwygEditorMode ) 513 { 514 m_currentElement.addContent( "~" ); 515 } 516 prefix = prefix.substring(0,prefix.length()-1); 517 } 518 if( camelCase != null ) 519 { 520 m_currentElement.addContent( prefix+camelCase ); 521 } 522 else if( protocol != null ) 523 { 524 m_currentElement.addContent( prefix+uri ); 525 } 526 continue; 527 } 528 529 // 530 // Fine, then let's check what kind of a link this was 531 // and emit the proper elements 532 // 533 if( protocol != null ) 534 { 535 final char c = uri.charAt(uri.length()-1); 536 if( c == '.' || c == ',' ) 537 { 538 uri = uri.substring(0,uri.length()-1); 539 buf = c + buf; 540 } 541 // System.out.println("URI match "+uri); 542 m_currentElement.addContent( prefix ); 543 makeDirectURILink( uri ); 544 } 545 else 546 { 547 // System.out.println("Matched: '"+camelCase+"'"); 548 // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); 549 // System.out.println("prefix="+prefix); 550 m_currentElement.addContent( prefix ); 551 552 makeCamelCaseLink( camelCase ); 553 } 554 } 555 556 m_currentElement.addContent( buf ); 557 } 558 else 559 { 560 // 561 // No camelcase asked for, just add the elements 562 // 563 m_currentElement.addContent( buf ); 564 } 565 } 566 catch( final IllegalDataException e ) 567 { 568 // 569 // Sometimes it's possible that illegal XML chars is added to the data. 570 // Here we make sure it does not stop parsing. 571 // 572 m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); 573 } 574 } 575 576 return numChars; 577 } 578 579 /** 580 * Escapes XML entities in a HTML-compatible way (i.e. does not escape 581 * entities that are already escaped). 582 * 583 * @param buf 584 * @return An escaped string. 585 */ 586 private String escapeHTMLEntities( final String buf) 587 { 588 final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); 589 590 for( int i = 0; i < buf.length(); i++ ) 591 { 592 final char ch = buf.charAt(i); 593 594 if( ch == '<' ) 595 { 596 tmpBuf.append("<"); 597 } 598 else if( ch == '>' ) 599 { 600 tmpBuf.append(">"); 601 } 602 else if( ch == '\"' ) 603 { 604 tmpBuf.append("""); 605 } 606 else if( ch == '&' ) 607 { 608 // 609 // If the following is an XML entity reference (&#.*;) we'll 610 // leave it as it is; otherwise we'll replace it with an & 611 // 612 613 boolean isEntity = false; 614 final StringBuilder entityBuf = new StringBuilder(); 615 616 if( i < buf.length() -1 ) 617 { 618 for( int j = i; j < buf.length(); j++ ) 619 { 620 final char ch2 = buf.charAt(j); 621 622 if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) 623 { 624 entityBuf.append(ch2); 625 626 if( ch2 == ';' ) 627 { 628 isEntity = true; 629 break; 630 } 631 } 632 else 633 { 634 break; 635 } 636 } 637 } 638 639 if( isEntity ) 640 { 641 tmpBuf.append( entityBuf ); 642 i = i + entityBuf.length() - 1; 643 } 644 else 645 { 646 tmpBuf.append("&"); 647 } 648 649 } 650 else 651 { 652 tmpBuf.append( ch ); 653 } 654 } 655 656 return tmpBuf.toString(); 657 } 658 659 private Element pushElement( final Element e ) 660 { 661 flushPlainText(); 662 m_currentElement.addContent( e ); 663 m_currentElement = e; 664 665 return e; 666 } 667 668 private Element addElement( final Content e ) 669 { 670 if( e != null ) 671 { 672 flushPlainText(); 673 m_currentElement.addContent( e ); 674 } 675 return m_currentElement; 676 } 677 678 /** 679 * All elements that can be empty by the HTML DTD. 680 */ 681 // Keep sorted. 682 private static final String[] EMPTY_ELEMENTS = { 683 "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" 684 }; 685 686 /** 687 * Goes through the current element stack and pops all elements until this 688 * element is found - this essentially "closes" and element. 689 * 690 * @param s 691 * @return The new current element, or null, if there was no such element in the entire stack. 692 */ 693 private Element popElement( final String s ) 694 { 695 final int flushedBytes = flushPlainText(); 696 697 Element currEl = m_currentElement; 698 699 while( currEl.getParentElement() != null ) 700 { 701 if( currEl.getName().equals(s) && !currEl.isRootElement() ) 702 { 703 m_currentElement = currEl.getParentElement(); 704 705 // 706 // Check if it's okay for this element to be empty. Then we will 707 // trick the JDOM generator into not generating an empty element, 708 // by putting an empty string between the tags. Yes, it's a kludge 709 // but what'cha gonna do about it. :-) 710 // 711 712 if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) 713 { 714 currEl.addContent(""); 715 } 716 717 return m_currentElement; 718 } 719 720 currEl = currEl.getParentElement(); 721 } 722 723 return null; 724 } 725 726 727 /** 728 * Reads the stream until it meets one of the specified 729 * ending characters, or stream end. The ending character will be left 730 * in the stream. 731 */ 732 private String readUntil( final String endChars ) 733 throws IOException 734 { 735 final StringBuilder sb = new StringBuilder( 80 ); 736 int ch = nextToken(); 737 738 while( ch != -1 ) 739 { 740 if( ch == '\\' ) 741 { 742 ch = nextToken(); 743 if( ch == -1 ) 744 { 745 break; 746 } 747 } 748 else 749 { 750 if( endChars.indexOf((char)ch) != -1 ) 751 { 752 pushBack( ch ); 753 break; 754 } 755 } 756 sb.append( (char) ch ); 757 ch = nextToken(); 758 } 759 760 return sb.toString(); 761 } 762 763 /** 764 * Reads the stream while the characters that have been specified are 765 * in the stream, returning then the result as a String. 766 */ 767 private String readWhile( final String endChars ) 768 throws IOException 769 { 770 final StringBuilder sb = new StringBuilder( 80 ); 771 int ch = nextToken(); 772 773 while( ch != -1 ) 774 { 775 if( endChars.indexOf((char)ch) == -1 ) 776 { 777 pushBack( ch ); 778 break; 779 } 780 781 sb.append( (char) ch ); 782 ch = nextToken(); 783 } 784 785 return sb.toString(); 786 } 787 788 private JSPWikiMarkupParser m_cleanTranslator; 789 790 /** 791 * Does a lazy init. Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before 792 * the TranslatorReader it is contained by is up. 793 */ 794 private JSPWikiMarkupParser getCleanTranslator() { 795 if( m_cleanTranslator == null ) { 796 final Context dummyContext = Wiki.context().create( m_engine, m_context.getHttpRequest(), m_context.getPage() ); 797 m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); 798 m_cleanTranslator.m_allowHTML = true; 799 } 800 801 return m_cleanTranslator; 802 } 803 /** 804 * Modifies the "hd" parameter to contain proper values. Because 805 * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the 806 * % after url encoding with '_'. 807 * <p> 808 * Counts also duplicate headings (= headings with similar name), and 809 * attaches a counter. 810 */ 811 private String makeHeadingAnchor( final String baseName, String title, final Heading hd ) { 812 hd.m_titleText = title; 813 title = MarkupParser.wikifyLink( title ); 814 hd.m_titleSection = m_engine.encodeName(title); 815 816 if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) { 817 final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1; 818 m_titleSectionCounter.put( hd.m_titleSection, count ); 819 hd.m_titleSection += "-" + count; 820 } else { 821 m_titleSectionCounter.put( hd.m_titleSection, 1 ); 822 } 823 824 hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection; 825 hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); 826 hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); 827 828 return hd.m_titleAnchor; 829 } 830 831 private String makeSectionTitle( String title ) { 832 title = title.trim(); 833 try { 834 final JSPWikiMarkupParser dtr = getCleanTranslator(); 835 dtr.setInputReader( new StringReader( title ) ); 836 final WikiDocument doc = dtr.parse(); 837 doc.setContext( m_context ); 838 839 return XmlUtil.extractTextFromDocument( doc ); 840 } catch( final IOException e ) { 841 log.fatal("Title parsing not working", e ); 842 throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e ); 843 } 844 } 845 846 /** 847 * Returns XHTML for the heading. 848 * 849 * @param level The level of the heading. @see Heading 850 * @param title the title for the heading 851 * @param hd a List to which heading should be added 852 * @return An Element containing the heading 853 */ 854 public Element makeHeading( final int level, final String title, final Heading hd ) { 855 final Element el; 856 final String pageName = m_context.getPage().getName(); 857 final String outTitle = makeSectionTitle( title ); 858 hd.m_level = level; 859 860 switch( level ) { 861 case Heading.HEADING_SMALL: 862 el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 863 break; 864 865 case Heading.HEADING_MEDIUM: 866 el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 867 break; 868 869 case Heading.HEADING_LARGE: 870 el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); 871 break; 872 873 default: 874 throw new InternalWikiException( "Illegal heading type " + level ); 875 } 876 877 return el; 878 } 879 880 /** 881 * When given a link to a WikiName, we just return 882 * a proper HTML link for it. The local link mutator 883 * chain is also called. 884 */ 885 private Element makeCamelCaseLink( final String wikiname ) 886 { 887 final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); 888 889 callMutatorChain( m_localLinkMutatorChain, wikiname ); 890 891 if( matchedLink != null ) { 892 makeLink( READ, matchedLink, wikiname, null, null ); 893 } else { 894 makeLink( EDIT, wikiname, wikiname, null, null ); 895 } 896 897 return m_currentElement; 898 } 899 900 /** Holds the image URL for the duration of this parser */ 901 private String m_outlinkImageURL; 902 903 /** 904 * Returns an element for the external link image (out.png). However, 905 * this method caches the URL for the lifetime of this MarkupParser, 906 * because it's commonly used, and we'll end up with possibly hundreds 907 * our thousands of references to it... It's a lot faster, too. 908 * 909 * @return An element containing the HTML for the outlink image. 910 */ 911 private Element outlinkImage() 912 { 913 Element el = null; 914 915 if( m_useOutlinkImage ) 916 { 917 if( m_outlinkImageURL == null ) 918 { 919 m_outlinkImageURL = m_context.getURL( ContextEnum.PAGE_NONE.getRequestContext(), OUTLINK_IMAGE ); 920 } 921 922 el = new Element( "img" ).setAttribute( "class", OUTLINK ); 923 el.setAttribute( "src", m_outlinkImageURL ); 924 el.setAttribute( "alt","" ); 925 } 926 927 return el; 928 } 929 930 /** 931 * Takes an URL and turns it into a regular wiki link. Unfortunately, 932 * because of the way that flushPlainText() works, it already encodes 933 * all of the XML entities. But so does WikiContext.getURL(), so we 934 * have to do a reverse-replace here, so that it can again be replaced in makeLink. 935 * <p> 936 * What a crappy problem. 937 * 938 * @param url 939 * @return An anchor Element containing the link. 940 */ 941 private Element makeDirectURILink( String url ) { 942 final Element result; 943 String last = null; 944 945 if( url.endsWith( "," ) || url.endsWith( "." ) ) { 946 last = url.substring( url.length() - 1 ); 947 url = url.substring( 0, url.length() - 1 ); 948 } 949 950 callMutatorChain( m_externalLinkMutatorChain, url ); 951 952 if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) { 953 result = handleImageLink( StringUtils.replace( url, "&", "&" ), url, false ); 954 } else { 955 result = makeLink( EXTERNAL, StringUtils.replace( url, "&", "&" ), url, null, null ); 956 addElement( outlinkImage() ); 957 } 958 959 if( last != null ) { 960 m_plainTextBuf.append( last ); 961 } 962 963 return result; 964 } 965 966 /** 967 * Image links are handled differently: 968 * 1. If the text is a WikiName of an existing page, 969 * it gets linked. 970 * 2. If the text is an external link, then it is inlined. 971 * 3. Otherwise it becomes an ALT text. 972 * 973 * @param reallink The link to the image. 974 * @param link Link text portion, may be a link to somewhere else. 975 * @param hasLinkText If true, then the defined link had a link text available. 976 * This means that the link text may be a link to a wiki page, 977 * or an external resource. 978 */ 979 980 // FIXME: isExternalLink() is called twice. 981 private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) 982 { 983 final String possiblePage = MarkupParser.cleanLink( link ); 984 985 if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) 986 { 987 return makeLink( IMAGELINK, reallink, link, null, null ); 988 } 989 else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) 990 { 991 // System.out.println("Orig="+link+", Matched: "+matchedLink); 992 callMutatorChain( m_localLinkMutatorChain, possiblePage ); 993 994 return makeLink( IMAGEWIKILINK, reallink, link, null, null ); 995 } 996 else 997 { 998 return makeLink( IMAGE, reallink, link, null, null ); 999 } 1000 } 1001 1002 private Element handleAccessRule( String ruleLine ) { 1003 if( m_wysiwygEditorMode ) { 1004 m_currentElement.addContent( "[" + ruleLine + "]" ); 1005 } 1006 1007 if( !m_parseAccessRules ) { 1008 return m_currentElement; 1009 } 1010 final Page page = m_context.getRealPage(); 1011 // UserDatabase db = m_context.getEngine().getUserDatabase(); 1012 1013 if( ruleLine.startsWith( "{" ) ) { 1014 ruleLine = ruleLine.substring( 1 ); 1015 } 1016 1017 if( ruleLine.endsWith( "}" ) ) { 1018 ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); 1019 } 1020 1021 log.debug("page={}, ACL = {}", page.getName(), ruleLine); 1022 1023 try { 1024 final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine ); 1025 page.setAcl( acl ); 1026 log.debug( acl.toString() ); 1027 } catch( final WikiSecurityException wse ) { 1028 return makeError( wse.getMessage() ); 1029 } 1030 1031 return m_currentElement; 1032 } 1033 1034 /** 1035 * Handles metadata setting [{SET foo=bar}] 1036 */ 1037 private Element handleMetadata( final String link ) { 1038 if( m_wysiwygEditorMode ) { 1039 m_currentElement.addContent( "[" + link + "]" ); 1040 } 1041 1042 try { 1043 final String args = link.substring( link.indexOf(' '), link.length()-1 ); 1044 final String name = args.substring( 0, args.indexOf('=') ).trim(); 1045 String val = args.substring( args.indexOf('=')+1 ).trim(); 1046 1047 if( val.startsWith("'") ) { 1048 val = val.substring( 1 ); 1049 } 1050 if( val.endsWith("'") ) { 1051 val = val.substring( 0, val.length()-1 ); 1052 } 1053 1054 // log.debug("SET name='"+name+"', value='"+val+"'."); 1055 1056 if( !name.isEmpty() && !val.isEmpty() ) { 1057 val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val ); 1058 m_context.getPage().setAttribute( name, val ); 1059 } 1060 } catch( final Exception e ) { 1061 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1062 return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); 1063 } 1064 1065 return m_currentElement; 1066 } 1067 1068 /** 1069 * Emits a processing instruction that will disable markup escaping. This is 1070 * very useful if you want to emit HTML directly into the stream. 1071 * 1072 */ 1073 private void disableOutputEscaping() { 1074 addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) ); 1075 } 1076 1077 /** 1078 * Gobbles up all hyperlinks that are encased in square brackets. 1079 */ 1080 private Element handleHyperlinks( String linktext, final int pos ) { 1081 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1082 final StringBuilder sb = new StringBuilder( linktext.length() + 80 ); 1083 1084 if( m_linkParsingOperations.isAccessRule( linktext ) ) { 1085 return handleAccessRule( linktext ); 1086 } 1087 1088 if( m_linkParsingOperations.isMetadata( linktext ) ) { 1089 return handleMetadata( linktext ); 1090 } 1091 1092 if( m_linkParsingOperations.isPluginLink( linktext ) ) { 1093 try { 1094 final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); 1095 1096 // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not. 1097 if( pluginContent != null ) { 1098 addElement( pluginContent ); 1099 pluginContent.executeParse( m_context ); 1100 } 1101 } catch( final PluginException e ) { 1102 log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); 1103 //log.info( "Root cause:",e.getRootThrowable() ); 1104 if( !m_wysiwygEditorMode ) { 1105 final ResourceBundle rbPlugin = Preferences.getBundle( m_context, Plugin.CORE_PLUGINS_RESOURCEBUNDLE ); 1106 return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), 1107 m_context.getRealPage().getWiki(), 1108 m_context.getRealPage().getName(), 1109 e.getMessage() ) ) ); 1110 } 1111 } 1112 1113 return m_currentElement; 1114 } 1115 1116 try { 1117 final LinkParser.Link link = m_linkParser.parse( linktext ); 1118 linktext = link.getText(); 1119 String linkref = link.getReference(); 1120 1121 // 1122 // Yes, we now have the components separated. 1123 // linktext = the text the link should have 1124 // linkref = the url or page name. 1125 // 1126 // In many cases these are the same. [linktext|linkref]. 1127 // 1128 if( m_linkParsingOperations.isVariableLink( linktext ) ) { 1129 final Content el = new VariableContent( linktext ); 1130 1131 addElement( el ); 1132 } else if( m_linkParsingOperations.isExternalLink( linkref ) ) { 1133 // It's an external link, out of this Wiki 1134 1135 callMutatorChain( m_externalLinkMutatorChain, linkref ); 1136 1137 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 1138 handleImageLink( linkref, linktext, link.hasReference() ); 1139 } else { 1140 makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); 1141 addElement( outlinkImage() ); 1142 } 1143 } else if( link.isInterwikiLink() ) { 1144 // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved. 1145 1146 // FIXME: There is an interesting issue here: We probably should 1147 // URLEncode the wikiPage, but we can't since some of the 1148 // Wikis use slashes (/), which won't survive URLEncoding. 1149 // Besides, we don't know which character set the other Wiki 1150 // is using, so you'll have to write the entire name as it appears 1151 // in the URL. Bugger. 1152 1153 final String extWiki = link.getExternalWiki(); 1154 final String wikiPage = link.getExternalWikiPage(); 1155 1156 if( m_wysiwygEditorMode ) { 1157 makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); 1158 } else { 1159 String urlReference = m_engine.getInterWikiURL( extWiki ); 1160 1161 if( urlReference != null ) { 1162 urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); 1163 urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); 1164 1165 if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) { 1166 handleImageLink( urlReference, linktext, link.hasReference() ); 1167 } else { 1168 makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); 1169 } 1170 1171 if( m_linkParsingOperations.isExternalLink( urlReference ) ) { 1172 addElement( outlinkImage() ); 1173 } 1174 } else { 1175 final Object[] args = { escapeHTMLEntities( extWiki ) }; 1176 1177 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); 1178 } 1179 } 1180 } else if( linkref.startsWith( "#" ) ) { 1181 // It defines a local footnote 1182 makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); 1183 } else if( TextUtil.isNumber( linkref ) ) { 1184 // It defines a reference to a local footnote 1185 makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); 1186 } else { 1187 final int hashMark; 1188 1189 // Internal wiki link, but is it an attachment link? 1190 String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref ); 1191 if( attachment != null ) { 1192 callMutatorChain( m_attachmentLinkMutatorChain, attachment ); 1193 1194 if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { 1195 attachment = m_context.getURL( ContextEnum.PAGE_ATTACH.getRequestContext(), attachment ); 1196 sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); 1197 } else { 1198 makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); 1199 } 1200 } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) { 1201 // It's an internal Wiki link, but to a named section 1202 1203 final String namedSection = linkref.substring( hashMark + 1 ); 1204 linkref = linkref.substring( 0, hashMark ); 1205 1206 linkref = MarkupParser.cleanLink( linkref ); 1207 1208 callMutatorChain( m_localLinkMutatorChain, linkref ); 1209 1210 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1211 if( matchedLink != null ) { 1212 String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) ); 1213 sectref = sectref.replace( '%', '_' ); 1214 makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); 1215 } else { 1216 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1217 } 1218 } else { 1219 // It's an internal Wiki link 1220 linkref = MarkupParser.cleanLink( linkref ); 1221 1222 callMutatorChain( m_localLinkMutatorChain, linkref ); 1223 1224 final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); 1225 if( matchedLink != null ) { 1226 makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); 1227 } else { 1228 makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); 1229 } 1230 } 1231 } 1232 1233 } catch( final ParseException e ) { 1234 log.info( "Parser failure: ", e ); 1235 final Object[] args = { e.getMessage() }; 1236 addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); 1237 } 1238 return m_currentElement; 1239 } 1240 1241 /** 1242 * Pushes back any string that has been read. It will obviously 1243 * be pushed back in a reverse order. 1244 * 1245 * @since 2.1.77 1246 */ 1247 private void pushBack( final String s ) 1248 throws IOException 1249 { 1250 for( int i = s.length()-1; i >= 0; i-- ) 1251 { 1252 pushBack( s.charAt(i) ); 1253 } 1254 } 1255 1256 private Element handleBackslash() 1257 throws IOException 1258 { 1259 final int ch = nextToken(); 1260 1261 if( ch == '\\' ) 1262 { 1263 final int ch2 = nextToken(); 1264 1265 if( ch2 == '\\' ) 1266 { 1267 pushElement( new Element("br").setAttribute("clear","all")); 1268 return popElement("br"); 1269 } 1270 1271 pushBack( ch2 ); 1272 1273 pushElement( new Element("br") ); 1274 return popElement("br"); 1275 } 1276 1277 pushBack( ch ); 1278 1279 return null; 1280 } 1281 1282 private Element handleUnderscore() 1283 throws IOException 1284 { 1285 final int ch = nextToken(); 1286 Element el = null; 1287 1288 if( ch == '_' ) 1289 { 1290 if( m_isbold ) 1291 { 1292 el = popElement("b"); 1293 } 1294 else 1295 { 1296 el = pushElement( new Element("b") ); 1297 } 1298 m_isbold = !m_isbold; 1299 } 1300 else 1301 { 1302 pushBack( ch ); 1303 } 1304 1305 return el; 1306 } 1307 1308 1309 /** 1310 * For example: italics. 1311 */ 1312 private Element handleApostrophe() 1313 throws IOException 1314 { 1315 final int ch = nextToken(); 1316 Element el = null; 1317 1318 if( ch == '\'' ) 1319 { 1320 if( m_isitalic ) 1321 { 1322 el = popElement("i"); 1323 } 1324 else 1325 { 1326 el = pushElement( new Element("i") ); 1327 } 1328 m_isitalic = !m_isitalic; 1329 } 1330 else 1331 { 1332 pushBack( ch ); 1333 } 1334 1335 return el; 1336 } 1337 1338 private Element handleOpenbrace( final boolean isBlock ) 1339 throws IOException 1340 { 1341 final int ch = nextToken(); 1342 1343 if( ch == '{' ) 1344 { 1345 final int ch2 = nextToken(); 1346 1347 if( ch2 == '{' ) 1348 { 1349 m_isPre = true; 1350 m_isEscaping = true; 1351 m_isPreBlock = isBlock; 1352 1353 if( isBlock ) 1354 { 1355 startBlockLevel(); 1356 return pushElement( new Element("pre") ); 1357 } 1358 1359 return pushElement( new Element("span").setAttribute("class","inline-code") ); 1360 } 1361 1362 pushBack( ch2 ); 1363 1364 return pushElement( new Element("tt") ); 1365 } 1366 1367 pushBack( ch ); 1368 1369 return null; 1370 } 1371 1372 /** 1373 * Handles both }} and }}} 1374 */ 1375 private Element handleClosebrace() 1376 throws IOException 1377 { 1378 final int ch2 = nextToken(); 1379 1380 if( ch2 == '}' ) 1381 { 1382 final int ch3 = nextToken(); 1383 1384 if( ch3 == '}' ) 1385 { 1386 if( m_isPre ) 1387 { 1388 if( m_isPreBlock ) 1389 { 1390 popElement( "pre" ); 1391 } 1392 else 1393 { 1394 popElement( "span" ); 1395 } 1396 1397 m_isPre = false; 1398 m_isEscaping = false; 1399 return m_currentElement; 1400 } 1401 1402 m_plainTextBuf.append("}}}"); 1403 return m_currentElement; 1404 } 1405 1406 pushBack( ch3 ); 1407 1408 if( !m_isEscaping ) 1409 { 1410 return popElement("tt"); 1411 } 1412 } 1413 1414 pushBack( ch2 ); 1415 1416 return null; 1417 } 1418 1419 private Element handleDash() 1420 throws IOException 1421 { 1422 int ch = nextToken(); 1423 1424 if( ch == '-' ) 1425 { 1426 final int ch2 = nextToken(); 1427 1428 if( ch2 == '-' ) 1429 { 1430 final int ch3 = nextToken(); 1431 1432 if( ch3 == '-' ) 1433 { 1434 // Empty away all the rest of the dashes. 1435 // Do not forget to return the first non-match back. 1436 do 1437 { 1438 ch = nextToken(); 1439 } 1440 while ( ch == '-' ); 1441 1442 pushBack(ch); 1443 startBlockLevel(); 1444 pushElement( new Element("hr") ); 1445 return popElement( "hr" ); 1446 } 1447 1448 pushBack( ch3 ); 1449 } 1450 pushBack( ch2 ); 1451 } 1452 1453 pushBack( ch ); 1454 1455 return null; 1456 } 1457 1458 private Element handleHeading() 1459 throws IOException 1460 { 1461 Element el = null; 1462 1463 final int ch = nextToken(); 1464 1465 final Heading hd = new Heading(); 1466 1467 if( ch == '!' ) 1468 { 1469 final int ch2 = nextToken(); 1470 1471 if( ch2 == '!' ) 1472 { 1473 final String title = peekAheadLine(); 1474 1475 el = makeHeading( Heading.HEADING_LARGE, title, hd); 1476 } 1477 else 1478 { 1479 pushBack( ch2 ); 1480 final String title = peekAheadLine(); 1481 el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); 1482 } 1483 } 1484 else 1485 { 1486 pushBack( ch ); 1487 final String title = peekAheadLine(); 1488 el = makeHeading( Heading.HEADING_SMALL, title, hd ); 1489 } 1490 1491 callHeadingListenerChain( hd ); 1492 1493 m_lastHeading = hd; 1494 1495 if( el != null ) pushElement(el); 1496 1497 return el; 1498 } 1499 1500 /** 1501 * Reads the stream until the next EOL or EOF. Note that it will also read the 1502 * EOL from the stream. 1503 */ 1504 private StringBuilder readUntilEOL() 1505 throws IOException 1506 { 1507 int ch; 1508 final StringBuilder buf = new StringBuilder( 256 ); 1509 1510 while( true ) 1511 { 1512 ch = nextToken(); 1513 1514 if( ch == -1 ) 1515 break; 1516 1517 buf.append( (char) ch ); 1518 1519 if( ch == '\n' ) 1520 break; 1521 } 1522 return buf; 1523 } 1524 1525 /** Controls whether italic is restarted after a paragraph shift */ 1526 1527 private boolean m_restartitalic; 1528 private boolean m_restartbold; 1529 1530 private boolean m_newLine; 1531 1532 /** 1533 * Starts a block level element, therefore closing 1534 * a potential open paragraph tag. 1535 */ 1536 private void startBlockLevel() 1537 { 1538 // These may not continue over block level limits in XHTML 1539 1540 popElement("i"); 1541 popElement("b"); 1542 popElement("tt"); 1543 1544 if( m_isOpenParagraph ) 1545 { 1546 m_isOpenParagraph = false; 1547 popElement("p"); 1548 m_plainTextBuf.append("\n"); // Just small beautification 1549 } 1550 1551 m_restartitalic = m_isitalic; 1552 m_restartbold = m_isbold; 1553 1554 m_isitalic = false; 1555 m_isbold = false; 1556 } 1557 1558 private static String getListType( final char c ) 1559 { 1560 if( c == '*' ) 1561 { 1562 return "ul"; 1563 } 1564 else if( c == '#' ) 1565 { 1566 return "ol"; 1567 } 1568 throw new InternalWikiException("Parser got faulty list type: "+c); 1569 } 1570 /** 1571 * Like original handleOrderedList() and handleUnorderedList() 1572 * however handles both ordered ('#') and unordered ('*') mixed together. 1573 */ 1574 1575 // FIXME: Refactor this; it's a bit messy. 1576 1577 private Element handleGeneralList() 1578 throws IOException 1579 { 1580 startBlockLevel(); 1581 1582 String strBullets = readWhile( "*#" ); 1583 // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution 1584 final int numBullets = strBullets.length(); 1585 1586 // override the beginning portion of bullet pattern to be like the previous 1587 // to simulate PHPWiki style lists 1588 1589 if(m_allowPHPWikiStyleLists) 1590 { 1591 // only substitute if different 1592 if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1593 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) 1594 { 1595 if(numBullets <= m_genlistlevel) 1596 { 1597 // Substitute all but the last character (keep the expressed bullet preference) 1598 strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") 1599 + strBullets.charAt( numBullets-1 ); 1600 } 1601 else 1602 { 1603 strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); 1604 } 1605 } 1606 } 1607 1608 // 1609 // Check if this is still of the same type 1610 // 1611 if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals 1612 (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) 1613 { 1614 if( numBullets > m_genlistlevel ) 1615 { 1616 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); 1617 1618 for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) 1619 { 1620 // bullets are growing, get from new bullet list 1621 pushElement( new Element("li") ); 1622 pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); 1623 } 1624 } 1625 else if( numBullets < m_genlistlevel ) 1626 { 1627 // Close the previous list item. 1628 // buf.append( m_renderer.closeListItem() ); 1629 popElement( "li" ); 1630 1631 for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) 1632 { 1633 // bullets are shrinking, get from old bullet list 1634 1635 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1636 if( m_genlistlevel > 0 ) 1637 { 1638 popElement( "li" ); 1639 } 1640 1641 } 1642 } 1643 else 1644 { 1645 if( m_genlistlevel > 0 ) 1646 { 1647 popElement( "li" ); 1648 } 1649 } 1650 } 1651 else 1652 { 1653 // 1654 // The pattern has changed, unwind and restart 1655 // 1656 int numEqualBullets; 1657 final int numCheckBullets; 1658 1659 // find out how much is the same 1660 numEqualBullets = 0; 1661 numCheckBullets = Math.min(numBullets,m_genlistlevel); 1662 1663 while( numEqualBullets < numCheckBullets ) 1664 { 1665 // if the bullets are equal so far, keep going 1666 if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) 1667 numEqualBullets++; 1668 // otherwise giveup, we have found how many are equal 1669 else 1670 break; 1671 } 1672 1673 //unwind 1674 for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) 1675 { 1676 popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); 1677 if( m_genlistlevel > numBullets ) 1678 { 1679 popElement("li"); 1680 } 1681 } 1682 1683 //rewind 1684 1685 pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); 1686 for(int i = numEqualBullets; i < numBullets; i++) 1687 { 1688 pushElement( new Element("li") ); 1689 pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); 1690 } 1691 m_genlistlevel = numBullets; 1692 } 1693 1694 // 1695 // Push a new list item, and eat away any extra whitespace 1696 // 1697 pushElement( new Element("li") ); 1698 readWhile(" "); 1699 1700 // work done, remember the new bullet list (in place of old one) 1701 m_genlistBulletBuffer.setLength(0); 1702 m_genlistBulletBuffer.append(strBullets); 1703 1704 return m_currentElement; 1705 } 1706 1707 private Element unwindGeneralList() 1708 { 1709 //unwind 1710 for( ; m_genlistlevel > 0; m_genlistlevel-- ) 1711 { 1712 popElement( "li" ); 1713 popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); 1714 } 1715 1716 m_genlistBulletBuffer.setLength(0); 1717 1718 return null; 1719 } 1720 1721 1722 private Element handleDefinitionList() 1723 throws IOException 1724 { 1725 if( !m_isdefinition ) 1726 { 1727 m_isdefinition = true; 1728 1729 startBlockLevel(); 1730 1731 pushElement( new Element("dl") ); 1732 return pushElement( new Element("dt") ); 1733 } 1734 1735 return null; 1736 } 1737 1738 private Element handleOpenbracket() 1739 throws IOException 1740 { 1741 final StringBuilder sb = new StringBuilder(40); 1742 final int pos = getPosition(); 1743 int ch = nextToken(); 1744 boolean isPlugin = false; 1745 1746 if( ch == '[' ) 1747 { 1748 if( m_wysiwygEditorMode ) 1749 { 1750 sb.append( '[' ); 1751 } 1752 1753 sb.append( (char)ch ); 1754 1755 while( (ch = nextToken()) == '[' ) 1756 { 1757 sb.append( (char)ch ); 1758 } 1759 } 1760 1761 1762 if( ch == '{' ) 1763 { 1764 isPlugin = true; 1765 } 1766 1767 pushBack( ch ); 1768 1769 if( sb.length() > 0 ) 1770 { 1771 m_plainTextBuf.append( sb ); 1772 return m_currentElement; 1773 } 1774 1775 // 1776 // Find end of hyperlink 1777 // 1778 1779 ch = nextToken(); 1780 int nesting = 1; // Check for nested plugins 1781 1782 while( ch != -1 ) 1783 { 1784 final int ch2 = nextToken(); pushBack(ch2); 1785 1786 if( isPlugin ) 1787 { 1788 if( ch == '[' && ch2 == '{' ) 1789 { 1790 nesting++; 1791 } 1792 else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) 1793 { 1794 break; 1795 } 1796 else if( ch == '}' && ch2 == ']' ) 1797 { 1798 // NB: This will be decremented once at the end 1799 nesting--; 1800 } 1801 } 1802 else 1803 { 1804 if( ch == ']' ) 1805 { 1806 break; 1807 } 1808 } 1809 1810 sb.append( (char) ch ); 1811 1812 ch = nextToken(); 1813 } 1814 1815 // 1816 // If the link is never finished, do some tricks to display the rest of the line 1817 // unchanged. 1818 // 1819 if( ch == -1 ) 1820 { 1821 log.debug("Warning: unterminated link detected!"); 1822 m_isEscaping = true; 1823 m_plainTextBuf.append( sb ); 1824 flushPlainText(); 1825 m_isEscaping = false; 1826 return m_currentElement; 1827 } 1828 1829 return handleHyperlinks( sb.toString(), pos ); 1830 } 1831 1832 /** 1833 * Reads the stream until the current brace is closed or stream end. 1834 */ 1835 private String readBraceContent( final char opening, final char closing ) 1836 throws IOException 1837 { 1838 final StringBuilder sb = new StringBuilder(40); 1839 int braceLevel = 1; 1840 int ch; 1841 while(( ch = nextToken() ) != -1 ) 1842 { 1843 if( ch == '\\' ) 1844 { 1845 continue; 1846 } 1847 else if ( ch == opening ) 1848 { 1849 braceLevel++; 1850 } 1851 else if ( ch == closing ) 1852 { 1853 braceLevel--; 1854 if (braceLevel==0) 1855 { 1856 break; 1857 } 1858 } 1859 sb.append( (char)ch ); 1860 } 1861 return sb.toString(); 1862 } 1863 1864 1865 /** 1866 * Handles constructs of type %%(style) and %%class 1867 * @param newLine 1868 * @return An Element containing the div or span, depending on the situation. 1869 * @throws IOException 1870 */ 1871 private Element handleDiv( final boolean newLine ) 1872 throws IOException 1873 { 1874 int ch = nextToken(); 1875 Element el = null; 1876 1877 if( ch == '%' ) 1878 { 1879 String style = null; 1880 String clazz = null; 1881 1882 ch = nextToken(); 1883 1884 // 1885 // Style or class? 1886 // 1887 if( ch == '(' ) 1888 { 1889 style = readBraceContent('(',')'); 1890 } 1891 else if( Character.isLetter( (char) ch ) ) 1892 { 1893 pushBack( ch ); 1894 clazz = readUntil( "( \t\n\r" ); 1895 //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters 1896 //CSS Classnames can contain only the characters [a-zA-Z0-9] and 1897 //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". 1898 //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. 1899 1900 //(1) replace '.' by spaces, allowing multiple classnames on a div or span 1901 //(2) remove any invalid character 1902 if( clazz != null){ 1903 1904 clazz = clazz.replace('.', ' ') 1905 .replaceAll("[^\\s-_\\w\\x200-\\x377]+",""); 1906 1907 } 1908 ch = nextToken(); 1909 1910 //check for %%class1.class2( style information ) 1911 if( ch == '(' ) 1912 { 1913 style = readBraceContent('(',')'); 1914 } 1915 // 1916 // Pop out only spaces, so that the upcoming EOL check does not check the 1917 // next line. 1918 // 1919 else if( ch == '\n' || ch == '\r' ) 1920 { 1921 pushBack(ch); 1922 } 1923 } 1924 else 1925 { 1926 // 1927 // Anything else stops. 1928 // 1929 1930 pushBack(ch); 1931 1932 try 1933 { 1934 final Boolean isSpan = m_styleStack.pop(); 1935 1936 if( isSpan == null ) 1937 { 1938 // Fail quietly 1939 } 1940 else if( isSpan.booleanValue() ) 1941 { 1942 el = popElement( "span" ); 1943 } 1944 else 1945 { 1946 el = popElement( "div" ); 1947 } 1948 } 1949 catch( final EmptyStackException e ) 1950 { 1951 log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); 1952 return m_currentElement; 1953 } 1954 1955 return el; 1956 } 1957 1958 // 1959 // Check if there is an attempt to do something nasty 1960 // 1961 1962 try 1963 { 1964 style = StringEscapeUtils.unescapeHtml4(style); 1965 if( style != null && style.indexOf("javascript:") != -1 ) 1966 { 1967 log.debug("Attempt to output javascript within CSS:"+style); 1968 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1969 return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); 1970 } 1971 } 1972 catch( final NumberFormatException e ) 1973 { 1974 // 1975 // If there are unknown entities, we don't want the parser to stop. 1976 // 1977 final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); 1978 final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); 1979 return addElement( makeError( msg ) ); 1980 } 1981 1982 // 1983 // Decide if we should open a div or a span? 1984 // 1985 final String eol = peekAheadLine(); 1986 1987 if( !eol.trim().isEmpty() ) 1988 { 1989 // There is stuff after the class 1990 1991 el = new Element("span"); 1992 1993 m_styleStack.push( Boolean.TRUE ); 1994 } 1995 else 1996 { 1997 startBlockLevel(); 1998 el = new Element("div"); 1999 m_styleStack.push( Boolean.FALSE ); 2000 } 2001 2002 if( style != null ) el.setAttribute("style", style); 2003 if( clazz != null ) el.setAttribute("class", clazz); 2004 el = pushElement( el ); 2005 2006 return el; 2007 } 2008 2009 pushBack(ch); 2010 2011 return el; 2012 } 2013 2014 private Element handleSlash( final boolean newLine ) 2015 throws IOException 2016 { 2017 final int ch = nextToken(); 2018 2019 pushBack(ch); 2020 if( ch == '%' && !m_styleStack.isEmpty() ) 2021 { 2022 return handleDiv( newLine ); 2023 } 2024 2025 return null; 2026 } 2027 2028 private Element handleBar( final boolean newLine ) 2029 throws IOException 2030 { 2031 Element el = null; 2032 2033 if( !m_istable && !newLine ) 2034 { 2035 return null; 2036 } 2037 2038 // 2039 // If the bar is in the first column, we will either start 2040 // a new table or continue the old one. 2041 // 2042 2043 if( newLine ) 2044 { 2045 if( !m_istable ) 2046 { 2047 startBlockLevel(); 2048 el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); 2049 m_istable = true; 2050 m_rowNum = 0; 2051 } 2052 2053 m_rowNum++; 2054 final Element tr = ( m_rowNum % 2 != 0 ) 2055 ? new Element("tr").setAttribute("class", "odd") 2056 : new Element("tr"); 2057 el = pushElement( tr ); 2058 } 2059 2060 // 2061 // Check out which table cell element to start; 2062 // a header element (th) or a regular element (td). 2063 // 2064 final int ch = nextToken(); 2065 2066 if( ch == '|' ) 2067 { 2068 if( !newLine ) 2069 { 2070 el = popElement("th"); 2071 if( el == null ) popElement("td"); 2072 } 2073 el = pushElement( new Element("th") ); 2074 } 2075 else 2076 { 2077 if( !newLine ) 2078 { 2079 el = popElement("td"); 2080 if( el == null ) popElement("th"); 2081 } 2082 2083 el = pushElement( new Element("td") ); 2084 2085 pushBack( ch ); 2086 } 2087 2088 return el; 2089 } 2090 2091 /** 2092 * Generic escape of next character or entity. 2093 */ 2094 private Element handleTilde() 2095 throws IOException 2096 { 2097 final int ch = nextToken(); 2098 2099 if( ch == ' ' ) 2100 { 2101 if( m_wysiwygEditorMode ) 2102 { 2103 m_plainTextBuf.append( "~ " ); 2104 } 2105 return m_currentElement; 2106 } 2107 2108 if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || 2109 ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || 2110 ch == '{' || ch == ']' || ch == '}' || ch == '%' ) 2111 { 2112 if( m_wysiwygEditorMode ) 2113 { 2114 m_plainTextBuf.append( '~' ); 2115 } 2116 2117 m_plainTextBuf.append( (char)ch ); 2118 m_plainTextBuf.append(readWhile( ""+(char)ch )); 2119 return m_currentElement; 2120 } 2121 2122 // No escape. 2123 pushBack( ch ); 2124 2125 return null; 2126 } 2127 2128 private void fillBuffer( final Element startElement ) 2129 throws IOException 2130 { 2131 m_currentElement = startElement; 2132 2133 boolean quitReading = false; 2134 m_newLine = true; 2135 disableOutputEscaping(); 2136 2137 while(!quitReading) 2138 { 2139 final int ch = nextToken(); 2140 2141 if( ch == -1 ) break; 2142 2143 // 2144 // Check if we're actually ending the preformatted mode. 2145 // We still must do an entity transformation here. 2146 // 2147 if( m_isEscaping ) 2148 { 2149 if( ch == '}' ) 2150 { 2151 if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); 2152 } 2153 else if( ch == -1 ) 2154 { 2155 quitReading = true; 2156 } 2157 else if( ch == '\r' ) 2158 { 2159 // DOS line feeds we ignore. 2160 } 2161 else if( ch == '<' ) 2162 { 2163 m_plainTextBuf.append( "<" ); 2164 } 2165 else if( ch == '>' ) 2166 { 2167 m_plainTextBuf.append( ">" ); 2168 } 2169 else if( ch == '&' ) 2170 { 2171 m_plainTextBuf.append( "&" ); 2172 } 2173 else if( ch == '~' ) 2174 { 2175 String braces = readWhile("}"); 2176 if( braces.length() >= 3 ) 2177 { 2178 m_plainTextBuf.append("}}}"); 2179 2180 braces = braces.substring(3); 2181 } 2182 else 2183 { 2184 m_plainTextBuf.append( (char) ch ); 2185 } 2186 2187 for( int i = braces.length()-1; i >= 0; i-- ) 2188 { 2189 pushBack(braces.charAt(i)); 2190 } 2191 } 2192 else 2193 { 2194 m_plainTextBuf.append( (char) ch ); 2195 } 2196 2197 continue; 2198 } 2199 2200 // 2201 // An empty line stops a list 2202 // 2203 if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) 2204 { 2205 m_plainTextBuf.append(unwindGeneralList()); 2206 } 2207 2208 if( m_newLine && ch != '|' && m_istable ) 2209 { 2210 popElement("table"); 2211 m_istable = false; 2212 } 2213 2214 int skip = IGNORE; 2215 2216 // 2217 // Do the actual parsing and catch any errors. 2218 // 2219 try 2220 { 2221 skip = parseToken( ch ); 2222 } 2223 catch( final IllegalDataException e ) 2224 { 2225 log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); 2226 2227 makeError("Error: "+cleanupSuspectData(e.getMessage()) ); 2228 } 2229 2230 // 2231 // The idea is as follows: If the handler method returns 2232 // an element (el != null), it is assumed that it has been 2233 // added in the stack. Otherwise the character is added 2234 // as is to the plaintext buffer. 2235 // 2236 // For the transition phase, if s != null, it also gets 2237 // added in the plaintext buffer. 2238 // 2239 2240 switch( skip ) 2241 { 2242 case ELEMENT: 2243 m_newLine = false; 2244 break; 2245 2246 case CHARACTER: 2247 m_plainTextBuf.append( (char) ch ); 2248 m_newLine = false; 2249 break; 2250 2251 case IGNORE: 2252 default: 2253 break; 2254 } 2255 } 2256 2257 closeHeadings(); 2258 popElement("domroot"); 2259 } 2260 2261 private String cleanupSuspectData( final String s ) 2262 { 2263 final StringBuilder sb = new StringBuilder( s.length() ); 2264 2265 for( int i = 0; i < s.length(); i++ ) 2266 { 2267 final char c = s.charAt(i); 2268 2269 if( Verifier.isXMLCharacter( c ) ) sb.append( c ); 2270 else sb.append( "0x" ).append( Integer.toString( c, 16 ).toUpperCase() ); 2271 } 2272 2273 return sb.toString(); 2274 } 2275 2276 /** The token is a plain character. */ 2277 protected static final int CHARACTER = 0; 2278 2279 /** The token is a wikimarkup element. */ 2280 protected static final int ELEMENT = 1; 2281 2282 /** The token is to be ignored. */ 2283 protected static final int IGNORE = 2; 2284 2285 /** 2286 * Return CHARACTER, if you think this was a plain character; ELEMENT, if 2287 * you think this was a wiki markup element, and IGNORE, if you think 2288 * we should ignore this altogether. 2289 * <p> 2290 * To add your own MarkupParser, you can override this method, but it 2291 * is recommended that you call super.parseToken() as well to gain advantage 2292 * of JSPWiki's own markup. You can call it at the start of your own 2293 * parseToken() or end - it does not matter. 2294 * 2295 * @param ch The character under investigation 2296 * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. 2297 * @throws IOException If parsing fails. 2298 */ 2299 protected int parseToken( final int ch ) 2300 throws IOException 2301 { 2302 Element el = null; 2303 2304 // 2305 // Now, check the incoming token. 2306 // 2307 switch( ch ) 2308 { 2309 case '\r': 2310 // DOS linefeeds we forget 2311 return IGNORE; 2312 2313 case '\n': 2314 // 2315 // Close things like headings, etc. 2316 // 2317 2318 // FIXME: This is not really very fast 2319 2320 closeHeadings(); 2321 2322 popElement("dl"); // Close definition lists. 2323 if( m_istable ) 2324 { 2325 popElement("tr"); 2326 } 2327 2328 m_isdefinition = false; 2329 2330 if( m_newLine ) 2331 { 2332 // Paragraph change. 2333 startBlockLevel(); 2334 2335 // 2336 // Figure out which elements cannot be enclosed inside 2337 // a <p></p> pair according to XHTML rules. 2338 // 2339 final String nextLine = peekAheadLine(); 2340 if( nextLine.isEmpty() || 2341 (!nextLine.isEmpty() && 2342 !nextLine.startsWith("{{{") && 2343 !nextLine.startsWith("----") && 2344 !nextLine.startsWith("%%") && 2345 "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) 2346 { 2347 pushElement( new Element("p") ); 2348 m_isOpenParagraph = true; 2349 2350 if( m_restartitalic ) 2351 { 2352 pushElement( new Element("i") ); 2353 m_isitalic = true; 2354 m_restartitalic = false; 2355 } 2356 if( m_restartbold ) 2357 { 2358 pushElement( new Element("b") ); 2359 m_isbold = true; 2360 m_restartbold = false; 2361 } 2362 } 2363 } 2364 else 2365 { 2366 m_plainTextBuf.append("\n"); 2367 m_newLine = true; 2368 } 2369 return IGNORE; 2370 2371 2372 case '\\': 2373 el = handleBackslash(); 2374 break; 2375 2376 case '_': 2377 el = handleUnderscore(); 2378 break; 2379 2380 case '\'': 2381 el = handleApostrophe(); 2382 break; 2383 2384 case '{': 2385 el = handleOpenbrace( m_newLine ); 2386 break; 2387 2388 case '}': 2389 el = handleClosebrace(); 2390 break; 2391 2392 case '-': 2393 if( m_newLine ) 2394 el = handleDash(); 2395 2396 break; 2397 2398 case '!': 2399 if( m_newLine ) 2400 { 2401 el = handleHeading(); 2402 } 2403 break; 2404 2405 case ';': 2406 if( m_newLine ) 2407 { 2408 el = handleDefinitionList(); 2409 } 2410 break; 2411 2412 case ':': 2413 if( m_isdefinition ) 2414 { 2415 popElement("dt"); 2416 el = pushElement( new Element("dd") ); 2417 m_isdefinition = false; 2418 } 2419 break; 2420 2421 case '[': 2422 el = handleOpenbracket(); 2423 break; 2424 2425 case '*': 2426 if( m_newLine ) 2427 { 2428 pushBack('*'); 2429 el = handleGeneralList(); 2430 } 2431 break; 2432 2433 case '#': 2434 if( m_newLine ) 2435 { 2436 pushBack('#'); 2437 el = handleGeneralList(); 2438 } 2439 break; 2440 2441 case '|': 2442 el = handleBar( m_newLine ); 2443 break; 2444 2445 case '~': 2446 el = handleTilde(); 2447 break; 2448 2449 case '%': 2450 el = handleDiv( m_newLine ); 2451 break; 2452 2453 case '/': 2454 el = handleSlash( m_newLine ); 2455 break; 2456 2457 default: 2458 break; 2459 } 2460 2461 return el != null ? ELEMENT : CHARACTER; 2462 } 2463 2464 private void closeHeadings() 2465 { 2466 if( m_lastHeading != null && !m_wysiwygEditorMode ) 2467 { 2468 // Add the hash anchor element at the end of the heading 2469 addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); 2470 m_lastHeading = null; 2471 } 2472 popElement("h2"); 2473 popElement("h3"); 2474 popElement("h4"); 2475 } 2476 2477 /** 2478 * Parses the entire document from the Reader given in the constructor or 2479 * set by {@link #setInputReader(Reader)}. 2480 * 2481 * @return A WikiDocument, ready to be passed to the renderer. 2482 * @throws IOException If parsing cannot be accomplished. 2483 */ 2484 @Override 2485 public WikiDocument parse() 2486 throws IOException 2487 { 2488 final WikiDocument d = new WikiDocument( m_context.getPage() ); 2489 d.setContext( m_context ); 2490 2491 final Element rootElement = new Element("domroot"); 2492 2493 d.setRootElement( rootElement ); 2494 2495 fillBuffer( rootElement ); 2496 2497 paragraphify(rootElement); 2498 2499 return d; 2500 } 2501 2502 /** 2503 * Checks out that the first paragraph is correctly installed. 2504 * 2505 * @param rootElement 2506 */ 2507 private void paragraphify( final Element rootElement) 2508 { 2509 // 2510 // Add the paragraph tag to the first paragraph 2511 // 2512 final List< Content > kids = rootElement.getContent(); 2513 2514 if( rootElement.getChild("p") != null ) 2515 { 2516 final ArrayList<Content> ls = new ArrayList<>(); 2517 int idxOfFirstContent = 0; 2518 int count = 0; 2519 2520 for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) 2521 { 2522 final Content c = i.next(); 2523 if( c instanceof Element ) 2524 { 2525 final String name = ( ( Element )c ).getName(); 2526 if( isBlockLevel( name ) ) break; 2527 } 2528 2529 if( !(c instanceof ProcessingInstruction) ) 2530 { 2531 ls.add( c ); 2532 if( idxOfFirstContent == 0 ) idxOfFirstContent = count; 2533 } 2534 } 2535 2536 // 2537 // If there were any elements, then add a new <p> (unless it would 2538 // be an empty one) 2539 // 2540 if( ls.size() > 0 ) 2541 { 2542 final Element newel = new Element("p"); 2543 2544 for( final Iterator< Content > i = ls.iterator(); i.hasNext(); ) 2545 { 2546 final Content c = i.next(); 2547 2548 c.detach(); 2549 newel.addContent(c); 2550 } 2551 2552 // 2553 // Make sure there are no empty <p/> tags added. 2554 // 2555 if( !newel.getTextTrim().isEmpty() || !newel.getChildren().isEmpty() ) 2556 rootElement.addContent(idxOfFirstContent, newel); 2557 } 2558 } 2559 } 2560 2561}